Commit 54db3874 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-8682 - CSV engine does not properly process "", in quotes

Added per-table boolean IETF_QUOTES variable to CSV storage engine. It allows to
enable IETF-compatible parsing of embedded quote and comma characters. Disabled
by default.

This patch is based on Percona revision:
https://github.com/percona/percona-server/commit/b32fbf02766f8db00f9ff4e6dc05a1df97c10ff7

Note that original patch adds server variable, while this patch adds per-table
variable.
parent bca5894d
...@@ -5493,3 +5493,57 @@ RENAME TABLE t1 TO t2; ...@@ -5493,3 +5493,57 @@ RENAME TABLE t1 TO t2;
SELECT * FROM t2; SELECT * FROM t2;
a a
DROP TABLE t2; DROP TABLE t2;
#
# MDEV-8664 - plugins.show_all_plugins --embedded fails in buildbot
#
CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV IETF_QUOTES=yes;
INSERT INTO t1 VALUES("a\"b,c","d");
INSERT INTO t1 VALUES("d","a\"b,c");
INSERT INTO t1 VALUES(",\"a","e");
INSERT INTO t1 VALUES("e",",\"a");
INSERT INTO t1 VALUES("\"","f");
INSERT INTO t1 VALUES("f","\"");
INSERT INTO t1 VALUES(",","g");
INSERT INTO t1 VALUES("g",",");
SELECT * FROM t1;
c1 c2
a"b,c d
d a"b,c
,"a e
e ,"a
" f
f "
, g
g ,
CSV file contents:
"a""b,c","d"
"d","a""b,c"
",""a","e"
"e",",""a"
"""","f"
"f",""""
",","g"
"g",","
DROP TABLE t1;
CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV IETF_QUOTES=yes;
Replacing t1.CSV
SELECT * FROM t1;
c1 c2
a b
a b
a"b,c d
d ,"a
a", e
e "
, f
ALTER TABLE t1 IETF_QUOTES=no;
SELECT * FROM t1;
c1 c2
a b
a b
a"b,c d
d ,"a
a", e
e "
, f
DROP TABLE t1;
......
...@@ -1926,3 +1926,46 @@ move_file $MYSQLD_DATADIR/test/t1.CSV $MYSQLD_DATADIR/test/t2.CSV; ...@@ -1926,3 +1926,46 @@ move_file $MYSQLD_DATADIR/test/t1.CSV $MYSQLD_DATADIR/test/t2.CSV;
RENAME TABLE t1 TO t2; RENAME TABLE t1 TO t2;
SELECT * FROM t2; SELECT * FROM t2;
DROP TABLE t2; DROP TABLE t2;
--echo #
--echo # MDEV-8664 - plugins.show_all_plugins --embedded fails in buildbot
--echo #
CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV IETF_QUOTES=yes;
INSERT INTO t1 VALUES("a\"b,c","d");
INSERT INTO t1 VALUES("d","a\"b,c");
INSERT INTO t1 VALUES(",\"a","e");
INSERT INTO t1 VALUES("e",",\"a");
INSERT INTO t1 VALUES("\"","f");
INSERT INTO t1 VALUES("f","\"");
INSERT INTO t1 VALUES(",","g");
INSERT INTO t1 VALUES("g",",");
SELECT * FROM t1;
--echo CSV file contents:
--cat_file $MYSQLD_DATADIR/test/t1.CSV
DROP TABLE t1;
CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV IETF_QUOTES=yes;
--echo Replacing t1.CSV
--remove_file $MYSQLD_DATADIR/test/t1.CSV
--write_file $MYSQLD_DATADIR/test/t1.CSV
a,b
"a","b"
"a""b,c","d"
"d",",""a"
"a"",",e
e,""""
",",f
EOF
SELECT * FROM t1;
ALTER TABLE t1 IETF_QUOTES=no;
SELECT * FROM t1;
DROP TABLE t1;
...@@ -61,6 +61,16 @@ TODO: ...@@ -61,6 +61,16 @@ TODO:
#define CSN_EXT ".CSN" // Files used during repair and update #define CSN_EXT ".CSN" // Files used during repair and update
#define CSM_EXT ".CSM" // Meta file #define CSM_EXT ".CSM" // Meta file
struct ha_table_option_struct
{
bool ietf_quotes;
};
ha_create_table_option csv_table_option_list[]=
{
HA_TOPTION_BOOL("IETF_QUOTES", ietf_quotes, 0),
HA_TOPTION_END
};
static TINA_SHARE *get_share(const char *table_name, TABLE *table); static TINA_SHARE *get_share(const char *table_name, TABLE *table);
static int free_share(TINA_SHARE *share); static int free_share(TINA_SHARE *share);
...@@ -164,6 +174,7 @@ static int tina_init_func(void *p) ...@@ -164,6 +174,7 @@ static int tina_init_func(void *p)
tina_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES | tina_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES |
HTON_NO_PARTITION); HTON_NO_PARTITION);
tina_hton->tablefile_extensions= ha_tina_exts; tina_hton->tablefile_extensions= ha_tina_exts;
tina_hton->table_options= csv_table_option_list;
return 0; return 0;
} }
...@@ -513,7 +524,7 @@ int ha_tina::encode_quote(uchar *buf) ...@@ -513,7 +524,7 @@ int ha_tina::encode_quote(uchar *buf)
char attribute_buffer[1024]; char attribute_buffer[1024];
String attribute(attribute_buffer, sizeof(attribute_buffer), String attribute(attribute_buffer, sizeof(attribute_buffer),
&my_charset_bin); &my_charset_bin);
bool ietf_quotes= table_share->option_struct->ietf_quotes;
my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set); my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set);
buffer.length(0); buffer.length(0);
...@@ -556,7 +567,7 @@ int ha_tina::encode_quote(uchar *buf) ...@@ -556,7 +567,7 @@ int ha_tina::encode_quote(uchar *buf)
{ {
if (*ptr == '"') if (*ptr == '"')
{ {
buffer.append('\\'); buffer.append(ietf_quotes ? '"' : '\\');
buffer.append('"'); buffer.append('"');
} }
else if (*ptr == '\r') else if (*ptr == '\r')
...@@ -648,6 +659,7 @@ int ha_tina::find_current_row(uchar *buf) ...@@ -648,6 +659,7 @@ int ha_tina::find_current_row(uchar *buf)
my_bitmap_map *org_bitmap; my_bitmap_map *org_bitmap;
int error; int error;
bool read_all; bool read_all;
bool ietf_quotes= table_share->option_struct->ietf_quotes;
DBUG_ENTER("ha_tina::find_current_row"); DBUG_ENTER("ha_tina::find_current_row");
free_root(&blobroot, MYF(0)); free_root(&blobroot, MYF(0));
...@@ -681,8 +693,10 @@ int ha_tina::find_current_row(uchar *buf) ...@@ -681,8 +693,10 @@ int ha_tina::find_current_row(uchar *buf)
a) If end of current field is reached, move a) If end of current field is reached, move
to next field and jump to step 2.3 to next field and jump to step 2.3
b) If current character is a \\ handle b) If current character is a \\ handle
\\n, \\r, \\, \\" \\n, \\r, \\, and \\" if not in ietf_quotes mode
c) else append the current character into the buffer c) if in ietf_quotes mode and the current character is
a ", handle ""
d) else append the current character into the buffer
before checking that EOL has not been reached. before checking that EOL has not been reached.
2.2) If the current character does not begin with a quote 2.2) If the current character does not begin with a quote
2.2.1) Until EOL has not been reached 2.2.1) Until EOL has not been reached
...@@ -723,15 +737,25 @@ int ha_tina::find_current_row(uchar *buf) ...@@ -723,15 +737,25 @@ int ha_tina::find_current_row(uchar *buf)
curr_offset+= 2; curr_offset+= 2;
break; break;
} }
if (curr_char == '\\' && curr_offset != (end_offset - 1)) if (ietf_quotes && curr_char == '"'
&& file_buff->get_value(curr_offset + 1) == '"')
{
/* Embedded IETF quote */
curr_offset++;
buffer.append('"');
}
else if (curr_char == '\\' && curr_offset != (end_offset - 1))
{ {
/* A quote followed by something else than a comma, end of line, or
(in IETF mode) another quote will be handled as a regular
character. */
curr_offset++; curr_offset++;
curr_char= file_buff->get_value(curr_offset); curr_char= file_buff->get_value(curr_offset);
if (curr_char == 'r') if (curr_char == 'r')
buffer.append('\r'); buffer.append('\r');
else if (curr_char == 'n' ) else if (curr_char == 'n' )
buffer.append('\n'); buffer.append('\n');
else if (curr_char == '\\' || curr_char == '"') else if (curr_char == '\\' || (!ietf_quotes && curr_char == '"'))
buffer.append(curr_char); buffer.append(curr_char);
else /* This could only happed with an externally created file */ else /* This could only happed with an externally created file */
{ {
...@@ -1744,6 +1768,10 @@ int ha_tina::reset(void) ...@@ -1744,6 +1768,10 @@ int ha_tina::reset(void)
bool ha_tina::check_if_incompatible_data(HA_CREATE_INFO *info_arg, bool ha_tina::check_if_incompatible_data(HA_CREATE_INFO *info_arg,
uint table_changes) uint table_changes)
{ {
if (info_arg->option_struct->ietf_quotes !=
table_share->option_struct->ietf_quotes)
return COMPATIBLE_DATA_NO;
return COMPATIBLE_DATA_YES; return COMPATIBLE_DATA_YES;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment