set @subselect_mat_cost=@@optimizer_switch;
set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
TEST GROUP 1:
Typical cases of in-to-exists and materialization subquery strategies
=====================================================================
drop database if exists world;
set names utf8;
create database world;
use world;
CREATE TABLE Country (
Code char(3) NOT NULL default '',
Name char(52) NOT NULL default '',
SurfaceArea float(10,2) NOT NULL default '0.00',
Population int(11) NOT NULL default '0',
Capital int(11) default NULL,
PRIMARY KEY  (Code),
UNIQUE INDEX (Name)
);
CREATE TABLE City (
ID int(11) NOT NULL auto_increment,
Name char(35) NOT NULL default '',
Country char(3) NOT NULL default '',
Population int(11) NOT NULL default '0',
PRIMARY KEY  (ID),
INDEX (Population),
INDEX (Country) 
);
CREATE TABLE CountryLanguage (
Country char(3) NOT NULL default '',
Language char(30) NOT NULL default '',
Percentage float(3,1) NOT NULL default '0.0',
PRIMARY KEY  (Country, Language),
INDEX (Percentage)
);
Make the schema and data more diverse by adding more indexes, nullable
columns, and NULL data.
create index SurfaceArea on Country(SurfaceArea);
create index Language on CountryLanguage(Language);
create index CityName on City(Name);
alter table City change population population int(11) null default 0;
select max(id) from City into @max_city_id;
insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL);
SELECT COUNT(*) FROM Country;
COUNT(*)
239
SELECT COUNT(*) FROM City;
COUNT(*)
4080
SELECT COUNT(*) FROM CountryLanguage;
COUNT(*)
984
set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on';

1. Subquery in a disjunctive WHERE clause of the outer query.


Q1.1m:
MATERIALIZATION: there are too many rows in the outer query
to be looked up in the inner table.
EXPLAIN
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 1000000;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	Name,SurfaceArea	NULL	NULL	NULL	239	Using where
2	SUBQUERY	City	ALL	Population,Country	NULL	NULL	NULL	4080	Using where
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 1000000;
Name
Algeria
Angola
Argentina
Australia
Bolivia
Brazil
Egypt
South Africa
Ethiopia
Indonesia
India
Iran
Canada
Kazakstan
China
Colombia
Congo, The Democratic Republic of the
Libyan Arab Jamahiriya
Mali
Mauritania
Mexico
Mongolia
Niger
Peru
Saudi Arabia
Sudan
Chad
Russian Federation
United States
Q1.1e:
IN-EXISTS: the materialization cost is the same as above, but
there are much fewer outer rows to be looked up, thus the
materialization cost is too high to compensate for fast lookups.
EXPLAIN
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 10*1000000;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	range	Name,SurfaceArea	SurfaceArea	4	NULL	5	Using index condition; Using where; Rowid-ordered scan
2	DEPENDENT SUBQUERY	City	index_subquery	Population,Country	Country	3	func	18	Using where
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 10*1000000;
Name
Russian Federation

Q1.2m:
MATERIALIZATION: the IN predicate is pushed (attached) to the last table
in the join order (Country, City), therefore there are too many row
combinations to filter by re-executing the subquery for each combination.
EXPLAIN
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(City.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
City.name LIKE '%Island%');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	PRIMARY,SurfaceArea	NULL	NULL	NULL	239	Using where
1	PRIMARY	City	ref	Country	Country	3	world.Country.Code	18	Using where
2	SUBQUERY	CountryLanguage	ALL	Percentage,Language	NULL	NULL	NULL	984	Using where
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(City.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
City.name LIKE '%Island%');
Code	Name	SurfaceArea	Population	Capital	ID	Name	Country	population
CCK	Cocos (Keeling) Islands	14.00	600	2317	2317	West Island	CCK	167
Q1.2e:
IN_EXISTS: join order is the same, but the left IN operand refers to
only the first table in the join order (Country), so there are much
fewer rows to filter by subquery re-execution.
EXPLAIN
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(Country.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
Country.name LIKE '%Island%');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	PRIMARY,SurfaceArea	NULL	NULL	NULL	239	Using where
1	PRIMARY	City	ref	Country	Country	3	world.Country.Code	18	
2	DEPENDENT SUBQUERY	CountryLanguage	index_subquery	Percentage,Language	Language	30	func	2	Using where
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(Country.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
Country.name LIKE '%Island%');
Code	Name	SurfaceArea	Population	Capital	ID	Name	Country	population
VGB	Virgin Islands, British	151.00	21000	537	537	Road Town	VGB	8000
CYM	Cayman Islands	264.00	38000	553	553	George Town	CYM	19600
COK	Cook Islands	236.00	20000	583	583	Avarua	COK	11900
FRO	Faroe Islands	1399.00	43000	901	901	Tórshavn	FRO	14542
CXR	Christmas Island	135.00	2500	1791	1791	Flying Fish Cove	CXR	700
KIR	Kiribati	726.00	83000	2256	2255	Bikenibeu	KIR	5055
KIR	Kiribati	726.00	83000	2256	2256	Bairiki	KIR	2226
CCK	Cocos (Keeling) Islands	14.00	600	2317	2316	Bantam	CCK	503
CCK	Cocos (Keeling) Islands	14.00	600	2317	2317	West Island	CCK	167
MHL	Marshall Islands	181.00	64000	2507	2507	Dalap-Uliga-Darrit	MHL	28000
NRU	Nauru	21.00	12000	2728	2727	Yangor	NRU	4050
NRU	Nauru	21.00	12000	2728	2728	Yaren	NRU	559
NFK	Norfolk Island	36.00	2000	2806	2806	Kingston	NFK	800
PLW	Palau	459.00	19000	2881	2881	Koror	PLW	12000
MNP	Northern Mariana Islands	464.00	78000	2913	2913	Garapan	MNP	9200
TCA	Turks and Caicos Islands	430.00	17000	3423	3423	Cockburn Town	TCA	4800
TUV	Tuvalu	26.00	12000	3424	3424	Funafuti	TUV	4600
VIR	Virgin Islands, U.S.	347.00	93000	4067	4067	Charlotte Amalie	VIR	13000

Q1.3:
For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen
for each respective subquery.
EXPLAIN
SELECT City.Name, Country.Name
FROM City,Country
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
((Country.Code, Country.Name) IN
(select Country, Language from CountryLanguage where Percentage > 50) AND
Country.Population > 3000000
OR
(Country.Code, City.Name) IN
(select Country, Language from CountryLanguage));
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	PRIMARY,SurfaceArea	NULL	NULL	NULL	239	Using where
1	PRIMARY	City	ref	Country	Country	3	world.Country.Code	18	Using where
3	SUBQUERY	CountryLanguage	index	PRIMARY,Language	PRIMARY	33	NULL	984	Using index
2	DEPENDENT SUBQUERY	CountryLanguage	unique_subquery	PRIMARY,Percentage,Language	PRIMARY	33	func,func	1	Using where
SELECT City.Name, Country.Name
FROM City,Country
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
((Country.Code, Country.Name) IN
(select Country, Language from CountryLanguage where Percentage > 50) AND
Country.Population > 3000000
OR
(Country.Code, City.Name) IN
(select Country, Language from CountryLanguage));
Name	Name
Kigali	Rwanda

2. NOT IN subqueries


Q2.1:
Number of cities that are not capitals in countries with small population.
MATERIALIZATION is 50 times faster because the cost of each subquery
re-execution is much higher than the cost of index lookups into the
materialized subquery.
EXPLAIN
select count(*) from City
where City.id not in (select capital from Country
where capital is not null and population < 100000);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	City	index	NULL	PRIMARY	4	NULL	4080	Using where; Using index
2	SUBQUERY	Country	ALL	NULL	NULL	NULL	NULL	239	Using where

Q2.2e:
Countries that speak French, but do not speak English
IN-EXISTS because the outer query filters many rows, thus
there are few lookups to make.
EXPLAIN
SELECT Country.Name
FROM Country, CountryLanguage 
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND CountryLanguage.Language = 'French'
  AND Code = Country;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	CountryLanguage	ref	PRIMARY,Language	Language	30	const	20	Using index condition
1	PRIMARY	Country	eq_ref	PRIMARY	PRIMARY	3	world.CountryLanguage.Country	1	Using index condition
2	DEPENDENT SUBQUERY	CountryLanguage	unique_subquery	PRIMARY,Language	PRIMARY	33	func,const	1	Using index; Using where
SELECT Country.Name
FROM Country, CountryLanguage 
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND CountryLanguage.Language = 'French'
  AND Code = Country;
Name
France
Saint Pierre and Miquelon
Belgium
Burundi
Guadeloupe
Haiti
Madagascar
Martinique
Mayotte
French Polynesia
Rwanda
Sao Tome and Principe
Switzerland
New Caledonia
Lebanon
Mauritius
Andorra
Italy
Luxembourg
Q2.2m:
Countries that speak French OR Spanish, but do not speak English
MATERIALIZATION because the outer query filters less rows than Q5-a,
so there are more lookups.
EXPLAIN
SELECT Country.Name
FROM Country, CountryLanguage 
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
AND Code = Country;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	CountryLanguage	range	PRIMARY,Language	Language	30	NULL	45	Using index condition; Rowid-ordered scan
1	PRIMARY	Country	eq_ref	PRIMARY	PRIMARY	3	world.CountryLanguage.Country	1	Using index condition
2	SUBQUERY	CountryLanguage	ref	PRIMARY,Language	Language	30	const	47	Using index condition
SELECT Country.Name
FROM Country, CountryLanguage 
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
AND Code = Country;
Name
Andorra
Argentina
Bolivia
Chile
Costa Rica
Dominican Republic
Ecuador
El Salvador
Spain
Guatemala
Honduras
Colombia
Cuba
Mexico
Nicaragua
Panama
Paraguay
Peru
France
Saint Pierre and Miquelon
Uruguay
Venezuela
Belgium
Burundi
Guadeloupe
Haiti
Madagascar
Martinique
Mayotte
French Polynesia
Rwanda
Sao Tome and Principe
Switzerland
New Caledonia
Lebanon
Mauritius
Andorra
Italy
Luxembourg
France
Sweden

Q2.3e:
Not a very meaningful query that tests NOT IN.
IN-EXISTS because the outer query is cheap enough to reexecute many times.
EXPLAIN
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	CountryLanguage	index	NULL	PRIMARY	33	NULL	984	Using where; Using index
2	DEPENDENT SUBQUERY	City	ref	CityName	CityName	35	func	2	Using index condition
2	DEPENDENT SUBQUERY	Country	eq_ref	PRIMARY	PRIMARY	3	world.City.Country	1	Using where; Using index
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
count(*)
979
Q2.3m:
MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING
clause prevents the use of the index on City(Name), and in practice reduces
radically the size of the temp table.
EXPLAIN
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code)
HAVING City.Name LIKE "Santa%");
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	CountryLanguage	index	NULL	PRIMARY	33	NULL	984	Using where; Using index
2	SUBQUERY	City	ALL	NULL	NULL	NULL	NULL	4080	
2	SUBQUERY	Country	eq_ref	PRIMARY	PRIMARY	3	world.City.Country	1	Using index
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code)
HAVING City.Name LIKE "Santa%");
count(*)
984

3. Subqueries with GROUP BY, HAVING, and aggregate functions

Q3.1:
Languages that are spoken in countries with 10 or 11 languages
MATERIALIZATION is about 100 times faster than IN-EXISTS.
EXPLAIN
select count(*)
from CountryLanguage
where
(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
WHERE Code = Country GROUP BY Code)
OR
(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
WHERE Code = Country GROUP BY Code)
order by Country;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	CountryLanguage	index	NULL	PRIMARY	33	NULL	984	Using where; Using index
3	SUBQUERY	CountryLanguage	index	PRIMARY	PRIMARY	33	NULL	984	Using index; Using temporary
3	SUBQUERY	Country	eq_ref	PRIMARY	PRIMARY	3	world.CountryLanguage.Country	1	Using index
2	SUBQUERY	CountryLanguage	index	PRIMARY	PRIMARY	33	NULL	984	Using index; Using temporary
2	SUBQUERY	Country	eq_ref	PRIMARY	PRIMARY	3	world.CountryLanguage.Country	1	Using index
select count(*)
from CountryLanguage
where
(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
WHERE Code = Country GROUP BY Code)
OR
(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
WHERE Code = Country GROUP BY Code)
order by Country;
count(*)
102

Q3.2:
Countries whose capital is a city name that names more than one
cities.
MATERIALIZATION because the cost of single subquery execution is
close to that of materializing the subquery.
EXPLAIN
select * from Country, City
where capital = id and
(City.name in (SELECT name FROM City
GROUP BY name HAVING Count(*) > 2) OR
capital is null);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	NULL	NULL	NULL	NULL	239	Using where
1	PRIMARY	City	eq_ref	PRIMARY	PRIMARY	4	world.Country.Capital	1	Using where
2	SUBQUERY	City	index	NULL	CityName	35	NULL	4080	Using index
select * from Country, City
where capital = id and
(City.name in (SELECT name FROM City
GROUP BY name HAVING Count(*) > 2) OR
capital is null);
Code	Name	SurfaceArea	Population	Capital	ID	Name	Country	population
BMU	Bermuda	53.00	65000	191	191	Hamilton	BMU	1200
BOL	Bolivia	1098581.00	8329000	194	194	La Paz	BOL	758141
CRI	Costa Rica	51100.00	4023000	584	584	San José	CRI	339131
HKG	Hong Kong	1075.00	6782000	937	937	Victoria	HKG	1312637
SYC	Seychelles	455.00	77000	3206	3206	Victoria	SYC	41000

Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS
EXPLAIN
SELECT Name
FROM Country
WHERE Country.Code NOT IN
(SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	Country	ALL	NULL	NULL	NULL	NULL	239	Using where
2	SUBQUERY	City	ALL	NULL	NULL	NULL	NULL	4080	Using temporary
SELECT Name
FROM Country
WHERE Country.Code NOT IN
(SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
Name
Antigua and Barbuda
Costa Rica
Montserrat
Norfolk Island
Seychelles
Antarctica
Bouvet Island
British Indian Ocean Territory
South Georgia and the South Sandwich Islands
Heard Island and McDonald Islands
French Southern territories
United States Minor Outlying Islands

4. Subqueries in the SELECT and HAVING clauses

Q4.1m:
Capital information about very big cities
MATERIALIZATION
EXPLAIN
select Name, City.id in (select capital from Country where capital is not null) as is_capital
from City
where City.population > 10000000;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	City	range	Population	Population	5	NULL	4	Using index condition; Rowid-ordered scan
2	SUBQUERY	Country	ALL	NULL	NULL	NULL	NULL	239	Using where
select Name, City.id in (select capital from Country where capital is not null) as is_capital
from City
where City.population > 10000000;
Name	is_capital
Mumbai (Bombay)	0
Q4.1e:
IN-TO-EXISTS after adding an index to make the subquery re-execution
efficient.
create index CountryCapital on Country(capital);
EXPLAIN
select Name, City.id in (select capital from Country where capital is not null) as is_capital
from City
where City.population > 10000000;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	City	range	Population	Population	5	NULL	4	Using index condition; Rowid-ordered scan
2	DEPENDENT SUBQUERY	Country	index_subquery	CountryCapital	CountryCapital	5	func	2	Using index; Using where
select Name, City.id in (select capital from Country where capital is not null) as is_capital
from City
where City.population > 10000000;
Name	is_capital
Mumbai (Bombay)	0
drop index CountryCapital on Country;

Q4.2:
MATERIALIZATION
EXPLAIN
SELECT City.Name, City.Population
FROM City JOIN Country ON City.Country = Country.Code
GROUP BY City.Name
HAVING City.Name IN (select Name from Country where population < 1000000);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	City	ALL	Country	NULL	NULL	NULL	4080	Using temporary; Using filesort
1	PRIMARY	Country	eq_ref	PRIMARY	PRIMARY	3	world.City.Country	1	Using index
2	SUBQUERY	Country	ALL	Name	NULL	NULL	NULL	239	Using where
SELECT City.Name, City.Population
FROM City JOIN Country ON City.Country = Country.Code
GROUP BY City.Name
HAVING City.Name IN (select Name from Country where population < 1000000);
Name	Population
Djibouti	383000
Gibraltar	27025
Macao	437500
San Marino	2294

5. Subqueries with UNION

Q5.1:
EXPLAIN
SELECT * from City where (Name, 91) in
(SELECT Name, round(Population/1000)
FROM City
WHERE Country = "IND" AND Population > 2500000
UNION
SELECT Name, round(Population/1000)
FROM City
WHERE Country = "IND" AND Population < 100000);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	City	ALL	NULL	NULL	NULL	NULL	4080	Using where
2	DEPENDENT SUBQUERY	City	ref	Population,Country,CityName	CityName	35	func	1	Using where
3	DEPENDENT UNION	City	ref	Population,Country,CityName	CityName	35	func	1	Using where
NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
SELECT * from City where (Name, 91) in
(SELECT Name, round(Population/1000)
FROM City
WHERE Country = "IND" AND Population > 2500000
UNION
SELECT Name, round(Population/1000)
FROM City
WHERE Country = "IND" AND Population < 100000);
ID	Name	Country	population
1359	Hassan	IND	90803
1360	Ambala Sadar	IND	90712
1361	Baidyabati	IND	90601
set @@optimizer_switch='default';
drop database world;


TEST GROUP 2:
Tests of various combinations of optimizer switches, types of queries,
available indexes, column nullability, constness of tables/predicates.
=====================================================================
set optimizer_switch=@subselect_mat_cost;