Commit f9e5f237 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-6027 RLIKE: "." no longer matching new line

Added a new system variable:
default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY'
parent bcf16fa6
...@@ -754,3 +754,88 @@ DROP TABLE t1; ...@@ -754,3 +754,88 @@ DROP TABLE t1;
SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*') REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*')
https://mariadb.org https://mariadb.org
#
# MDEV-6027 RLIKE: "." no longer matching new line
#
SELECT 'cat and\ndog' RLIKE 'cat.*dog';
'cat and\ndog' RLIKE 'cat.*dog'
0
SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
'cat and\r\ndog' RLIKE 'cat.*dog'
0
SELECT 'a\nb' RLIKE 'a.b';
'a\nb' RLIKE 'a.b'
0
SELECT 'a\nb' RLIKE '(?-s)a.b';
'a\nb' RLIKE '(?-s)a.b'
0
SET default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL
SELECT 'cat and\ndog' RLIKE 'cat.*dog';
'cat and\ndog' RLIKE 'cat.*dog'
1
SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
'cat and\r\ndog' RLIKE 'cat.*dog'
1
SELECT 'a\nb' RLIKE 'a.b';
'a\nb' RLIKE 'a.b'
1
SELECT 'a\nb' RLIKE '(?-s)a.b';
'a\nb' RLIKE '(?-s)a.b'
0
SET default_regex_flags=DEFAULT;
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp
SET default_regex_flags='DUPNAMES';
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
Monday Mon
SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
Tuesday Tue
SET default_regex_flags=DEFAULT;
SELECT 'AB' RLIKE 'A B';
'AB' RLIKE 'A B'
0
SELECT 'AB' RLIKE 'A# this is a comment\nB';
'AB' RLIKE 'A# this is a comment\nB'
0
SET default_regex_flags='EXTENDED';
SELECT 'AB' RLIKE 'A B';
'AB' RLIKE 'A B'
1
SELECT 'AB' RLIKE 'A# this is a comment\nB';
'AB' RLIKE 'A# this is a comment\nB'
1
SET default_regex_flags=DEFAULT;
SELECT 'Aq' RLIKE 'A\\q';
'Aq' RLIKE 'A\\q'
1
SET default_regex_flags='EXTRA';
SELECT 'Aq' RLIKE 'A\\q';
ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp
SET default_regex_flags=DEFAULT;
SELECT 'a\nb\nc' RLIKE '^b$';
'a\nb\nc' RLIKE '^b$'
0
SET default_regex_flags='MULTILINE';
SELECT 'a\nb\nc' RLIKE '^b$';
'a\nb\nc' RLIKE '^b$'
1
SET default_regex_flags=DEFAULT;
SELECT REGEXP_SUBSTR('abc','.+');
REGEXP_SUBSTR('abc','.+')
abc
SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
abc/
SET default_regex_flags='UNGREEDY';
SELECT REGEXP_SUBSTR('abc','.+');
REGEXP_SUBSTR('abc','.+')
a
SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
/abc
SET default_regex_flags=DEFAULT;
......
...@@ -136,6 +136,10 @@ The following options may be given as the first argument: ...@@ -136,6 +136,10 @@ The following options may be given as the first argument:
--deadlock-timeout-short=# --deadlock-timeout-short=#
Short timeout for the two-step deadlock detection (in Short timeout for the two-step deadlock detection (in
microseconds) microseconds)
--default-regex-flags=name
Default flags for the regex library. Syntax:
default-regex-flags='[flag[,flag[,flag...]]]'. See the
manual for the complete list of valid flags
--default-storage-engine=name --default-storage-engine=name
The default storage engine for new tables The default storage engine for new tables
--default-time-zone=name --default-time-zone=name
...@@ -1079,6 +1083,7 @@ deadlock-search-depth-long 15 ...@@ -1079,6 +1083,7 @@ deadlock-search-depth-long 15
deadlock-search-depth-short 4 deadlock-search-depth-short 4
deadlock-timeout-long 50000000 deadlock-timeout-long 50000000
deadlock-timeout-short 10000 deadlock-timeout-short 10000
default-regex-flags
default-storage-engine myisam default-storage-engine myisam
default-time-zone (No default value) default-time-zone (No default value)
default-week-format 0 default-week-format 0
......
SET default_regex_flags='';
SELECT @@default_regex_flags;
@@default_regex_flags
SET default_regex_flags=DEFAULT;
SELECT @@default_regex_flags;
@@default_regex_flags
SET default_regex_flags=NULL;
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
SELECT @@default_regex_flags;
@@default_regex_flags
SET default_regex_flags='UNKNOWN';
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN'
SET default_regex_flags=123;
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123'
SET default_regex_flags=123.0;
ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
SET default_regex_flags=123e0;
ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
SET default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL
SET default_regex_flags=NULL;
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL
SET @@default_regex_flags=63;
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
SET @@default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL
SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
SET @@default_regex_flags=DEFAULT;
SET @@global.default_regex_flags='MULTILINE';
SELECT @@session.default_regex_flags;
@@session.default_regex_flags
# connection con1
SELECT @@session.default_regex_flags;
@@session.default_regex_flags
MULTILINE
# connection default
SELECT @@session.default_regex_flags;
@@session.default_regex_flags
SET @@global.default_regex_flags=DEFAULT;
SET default_regex_flags=DEFAULT;
SET default_regex_flags='';
SELECT @@default_regex_flags;
SET default_regex_flags=DEFAULT;
SELECT @@default_regex_flags;
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags=NULL;
SELECT @@default_regex_flags;
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags='UNKNOWN';
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags=123;
--error ER_WRONG_TYPE_FOR_VAR
SET default_regex_flags=123.0;
--error ER_WRONG_TYPE_FOR_VAR
SET default_regex_flags=123e0;
SET default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags=NULL;
SELECT @@default_regex_flags;
SET @@default_regex_flags=63;
SELECT @@default_regex_flags;
SET @@default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
SELECT @@default_regex_flags;
SET @@default_regex_flags=DEFAULT;
SET @@global.default_regex_flags='MULTILINE';
SELECT @@session.default_regex_flags;
connect (con1,localhost,root,,);
--echo # connection con1
connection con1;
SELECT @@session.default_regex_flags;
connection default;
--echo # connection default
disconnect con1;
SELECT @@session.default_regex_flags;
SET @@global.default_regex_flags=DEFAULT;
SET default_regex_flags=DEFAULT;
...@@ -349,3 +349,51 @@ DROP TABLE t1; ...@@ -349,3 +349,51 @@ DROP TABLE t1;
SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
--echo #
--echo # MDEV-6027 RLIKE: "." no longer matching new line
--echo #
SELECT 'cat and\ndog' RLIKE 'cat.*dog';
SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
SELECT 'a\nb' RLIKE 'a.b';
SELECT 'a\nb' RLIKE '(?-s)a.b';
SET default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
SELECT 'cat and\ndog' RLIKE 'cat.*dog';
SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
SELECT 'a\nb' RLIKE 'a.b';
SELECT 'a\nb' RLIKE '(?-s)a.b';
SET default_regex_flags=DEFAULT;
--error ER_REGEXP_ERROR
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
SET default_regex_flags='DUPNAMES';
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
SET default_regex_flags=DEFAULT;
SELECT 'AB' RLIKE 'A B';
SELECT 'AB' RLIKE 'A# this is a comment\nB';
SET default_regex_flags='EXTENDED';
SELECT 'AB' RLIKE 'A B';
SELECT 'AB' RLIKE 'A# this is a comment\nB';
SET default_regex_flags=DEFAULT;
SELECT 'Aq' RLIKE 'A\\q';
SET default_regex_flags='EXTRA';
--error ER_REGEXP_ERROR
SELECT 'Aq' RLIKE 'A\\q';
SET default_regex_flags=DEFAULT;
SELECT 'a\nb\nc' RLIKE '^b$';
SET default_regex_flags='MULTILINE';
SELECT 'a\nb\nc' RLIKE '^b$';
SET default_regex_flags=DEFAULT;
SELECT REGEXP_SUBSTR('abc','.+');
SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
SET default_regex_flags='UNGREEDY';
SELECT REGEXP_SUBSTR('abc','.+');
SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
SET default_regex_flags=DEFAULT;
...@@ -5055,6 +5055,11 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg) ...@@ -5055,6 +5055,11 @@ bool Item_func_like::find_selective_predicates_list_processor(uchar *arg)
} }
int Regexp_processor_pcre::default_regex_flags()
{
return default_regex_flags_pcre(current_thd);
}
/** /**
Convert string to lib_charset, if needed. Convert string to lib_charset, if needed.
......
...@@ -1513,9 +1513,10 @@ class Regexp_processor_pcre ...@@ -1513,9 +1513,10 @@ class Regexp_processor_pcre
m_library_charset(&my_charset_utf8_general_ci), m_library_charset(&my_charset_utf8_general_ci),
m_subpatterns_needed(0) m_subpatterns_needed(0)
{} {}
int default_regex_flags();
void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns) void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
{ {
m_library_flags= extra_flags | m_library_flags= default_regex_flags() | extra_flags |
(data_charset != &my_charset_bin ? (data_charset != &my_charset_bin ?
(PCRE_UTF8 | PCRE_UCP) : 0) | (PCRE_UTF8 | PCRE_UCP) : 0) |
((data_charset->state & ((data_charset->state &
......
...@@ -337,6 +337,7 @@ bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type); ...@@ -337,6 +337,7 @@ bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type);
ulonglong expand_sql_mode(ulonglong sql_mode); ulonglong expand_sql_mode(ulonglong sql_mode);
bool sql_mode_string_representation(THD *thd, ulonglong sql_mode, LEX_STRING *ls); bool sql_mode_string_representation(THD *thd, ulonglong sql_mode, LEX_STRING *ls);
int default_regex_flags_pcre(const THD *thd);
extern sys_var *Sys_autocommit_ptr; extern sys_var *Sys_autocommit_ptr;
......
...@@ -516,6 +516,7 @@ typedef struct system_variables ...@@ -516,6 +516,7 @@ typedef struct system_variables
ulonglong join_buff_size; ulonglong join_buff_size;
ulonglong sortbuff_size; ulonglong sortbuff_size;
ulonglong group_concat_max_len; ulonglong group_concat_max_len;
ulonglong default_regex_flags;
ha_rows select_limit; ha_rows select_limit;
ha_rows max_join_size; ha_rows max_join_size;
ha_rows expensive_subquery_limit; ha_rows expensive_subquery_limit;
......
...@@ -4568,6 +4568,46 @@ static Sys_var_set Sys_log_slow_filter( ...@@ -4568,6 +4568,46 @@ static Sys_var_set Sys_log_slow_filter(
log_slow_filter_names, log_slow_filter_names,
DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1))); DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1)));
static const char *default_regex_flags_names[]=
{
"DOTALL", // (?s) . matches anything including NL
"DUPNAMES", // (?J) Allow duplicate names for subpatterns
"EXTENDED", // (?x) Ignore white space and # comments
"EXTRA", // (?X) extra features (e.g. error on unknown escape character)
"MULTILINE", // (?m) ^ and $ match newlines within data
"UNGREEDY", // (?U) Invert greediness of quantifiers
0
};
static const int default_regex_flags_to_pcre[]=
{
PCRE_DOTALL,
PCRE_DUPNAMES,
PCRE_EXTENDED,
PCRE_EXTRA,
PCRE_MULTILINE,
PCRE_UNGREEDY,
0
};
int default_regex_flags_pcre(const THD *thd)
{
ulonglong src= thd->variables.default_regex_flags;
int i, res;
for (i= res= 0; default_regex_flags_to_pcre[i]; i++)
{
if (src & (1 << i))
res|= default_regex_flags_to_pcre[i];
}
return res;
}
static Sys_var_set Sys_default_regex_flags(
"default_regex_flags",
"Default flags for the regex library. "
"Syntax: default-regex-flags='[flag[,flag[,flag...]]]'. "
"See the manual for the complete list of valid flags",
SESSION_VAR(default_regex_flags), CMD_LINE(REQUIRED_ARG),
default_regex_flags_names,
DEFAULT(0));
static Sys_var_ulong Sys_log_slow_rate_limit( static Sys_var_ulong Sys_log_slow_rate_limit(
"log_slow_rate_limit", "log_slow_rate_limit",
"Write to slow log every #th slow query. Set to 1 to log everything. " "Write to slow log every #th slow query. Set to 1 to log everything. "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment