【GreatSQL优化器-11】finalize_table_conditions
一、finalize_table_conditions介绍
GreatSQL的优化器在对join做完表排序后,在make_join_query_block
函数对表添加条件,添加完条件在finalize_table_conditions
会对条件再次进行确认,对ref扫描的条件进行删除,对需要cache的条件进行替换,生成的条件就是表执行查询最后用的条件。
下面用一个简单的例子来说明finalize_table_conditions
做什么事情。
```sql
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT,date1 DATETIME);
INSERT INTO t1 VALUES (1,10,'2021-03-25 16:44:00.123456'),(2,1,'2022-03-26 16:44:00.123456'),(3,4,'2023-03-27 16:44:00.123456'),(5,5,'2024-03-25 16:44:00.123456'),(7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456');
CREATE TABLE t2 (cc1 INT PRIMARY KEY, cc2 INT);
INSERT INTO t2 VALUES (1,3),(2,1),(3,2),(4,3),(5,15);
CREATE TABLE t3 (ccc1 INT, ccc2 VARCHAR(100));
INSERT INTO t3 VALUES (1,'aa1'),(2,'bb1'),(3,'cc1'),(4,'dd1'),(null,'ee');
CREATE INDEX idx1 ON t1(c2);
CREATE INDEX idx2 ON t1(c2,date1);
CREATE INDEX idx2_1 ON t2(cc2);
CREATE INDEX idx3_1 ON t3(ccc1);
greatsql > EXPLAIN SELECT * FROM t1 JOIN t2 JOIN t3 ON t1.c1=t2.cc1 AND t1.c1=t3.ccc1 AND t3.ccc1<5;
+----+-------------+-------+------------+--------+---------------+---------+---------+-----------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+--------+---------------+---------+---------+-----------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | range | PRIMARY | PRIMARY | 4 | NULL | 3 | 100.00 | Using where |
| 1 | SIMPLE | t2 | NULL | eq_ref | PRIMARY | PRIMARY | 4 | db1.t1.c1 | 1 | 100.00 | NULL |
| 1 | SIMPLE | t3 | NULL | ref | idx3_1 | idx3_1 | 5 | db1.t1.c1 | 1 | 100.00 | NULL |
+----+-------------+-------+------------+--------+---------------+---------+---------+-----------+------+----------+-------------+
{
"attaching_conditions_to_tables": {
"original_condition": "((`t2`.`cc1` = `t1`.`c1`) and (`t3`.`ccc1` = `t1`.`c1`) and (`t1`.`c1` < 5))",
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`t1`",
"attached": "(`t1`.`c1` < 5)"
},
{
"table": "`t2`",
"attached": "(`t2`.`cc1` = `t1`.`c1`)"
},
{
"table": "`t3`",
"attached": "(`t3`.`ccc1` = `t1`.`c1`)"
}
]
}
},
{
"finalizing_table_conditions": [
{
"table": "`t1`",
"original_table_condition": "(`t1`.`c1` < 5)",
"final_table_condition ": "(`t1`.`c1` < 5)"
},
{
"table": "`t2`",
"original_table_condition": "(`t2`.`cc1` = `t1`.`c1`)", 原始添加的条件
"final_table_condition ": null 经过finalize_table_conditions以后得到的结果,这里条件被删除了
},
{
"table": "`t3`",
"original_table_condition": "(`t3`.`ccc1` = `t1`.`c1`)", 原始添加的条件
"final_table_condition ": null 经过finalize_table_conditions以后得到的结果,这里条件被删除了
}
]
},
```
二、finalize_table_conditions代码解释
finalize_table_conditions
的操作在优化器的中后阶段,用来对之前生成的每张表的条件进行替换或者删除。
```sql
bool JOIN::optimize(bool finalize_access_paths) {
if (finalize_table_conditions(thd)) return true;
}
bool JOIN::finalize_table_conditions(THD *thd) {
// 遍历之前已经排序好的表,找到每张表的条件,然后进行裁剪
for (uint i = const_tables; i < tables; i++) {
Item *condition = best_ref[i]->condition();
if (condition == nullptr) continue;
// 这里进行条件删减,操作见下面表一
reduce_cond_for_table();
if (condition != nullptr) {
condition = condition->compile(
// 这个函数确认cond条件是否需要cache,true的话给carg->cache_item赋值,以便下面函数生成对应的Item_cache
// 如果条件属性是INNER_TABLE_BIT并且不满足表二的话需要创建对应的Item_cache
&Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
// 这个函数对于需要Item_cache的Item生成对应的Item_cache
&Item::cache_const_expr_transformer, (uchar *)&cache_arg);
trace_cond.add("final_table_condition ", condition);
}
}
}
```
表一:reduce_cond_for_table操作
Item类型 | 操作 | 结果 |
---|---|---|
Item_func::COND_AND_FUNC | 递归reduce_cond_for_table() | 为空的话删除,与cond不同的话替换 |
Item_func::COND_OR_FUNC | 递归reduce_cond_for_table() | 与cond不同的话替换 |
Item_func::TRIG_COND_FUNC | 递归reduce_cond_for_table() | 与cond不同的话替换 |
Item_func::EQ_FUNC | 通过test_if_ref()判断该条件是否使用ref方式扫描※重要 | true返回空,false返回原始cond |
其他类型 | 不操作 | 直接返回原始cond |
表二:不能生成Item cache的Item
序号 | Item类型 |
---|---|
1 | 常数类型 |
2 | 表的列 |
3 | 子查询 |
4 | ROW对象 |
5 | prepare的参数 |
6 | 已经被cache了 |
三、实际例子说明
接下来看几个例子来说明上面的代码:
```sql
greatsql > EXPLAIN SELECT * FROM t1 JOIN t2 JOIN t3 ON t1.c1=t2.cc1 AND t1.c1=t3.ccc1 AND t3.ccc1<5;
{
"plan_prefix": [
],
"table": "`t1`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "PRIMARY",
"usable": false,
"chosen": false
},
{
"rows_to_scan": 7,
"filtering_effect": [
],
"final_filtering_effect": 0.428571,
"access_type": "scan",
"resulting_rows": 3,
"cost": 1.7,
"chosen": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 3,
"cost_for_plan": 1.7,
"rest_of_plan": [
{
"plan_prefix": [
"`t1`"
],
"table": "`t2`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "eq_ref", 确定t2使用了ref方式扫描并且用到了主键索引
"index": "PRIMARY",
"rows": 1,
"cost": 3.3,
"chosen": true,
"cause": "clustered_pk_chosen_by_heuristics"
},
{
"access_type": "range",
"range_details": {
"used_index": "PRIMARY"
},
"chosen": false,
"cause": "heuristic_index_cheaper"
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 3,
"cost_for_plan": 5,
"rest_of_plan": [
{
"plan_prefix": [
"`t1`",
"`t2`"
],
"table": "`t3`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref", 确定t3使用了ref方式扫描并且用到了idx3_1索引
"index": "idx3_1",
"rows": 1,
"cost": 1.05,
"chosen": true
},
{
"access_type": "range",
"range_details": {
"used_index": "idx3_1"
},
"cost": 2.06,
"rows": 4,
"chosen": false,
"cause": "cost"
}
]
},
"added_to_eq_ref_extension": false
},
{
"finalizing_table_conditions": [
{
"table": "`t1`",
"original_table_condition": "(`t1`.`c1` < 5)",
"final_table_condition ": "(`t1`.`c1` < 5)"
},
{
"table": "`t2`",
"original_table_condition": "(`t2`.`cc1` = `t1`.`c1`)", 这里发现t2.cc1等号条件用到了ref方式扫描,因此被裁剪了
"final_table_condition ": null 条件被删除
},
{
"table": "`t3`",
"original_table_condition": "(`t3`.`ccc1` = `t1`.`c1`)", 这里发现t3.ccc1等号条件用到了ref方式扫描,因此被裁剪了
"final_table_condition ": null 条件被删除
}
]
},
{
"refine_plan": [
{
"table": "`t1`"
},
{
"table": "`t2`"
},
{
"table": "`t3`"
}
]
```
下面加一个带有INNER_TABLE_BIT属性的Item条件,看看条件转换后的结果。
```sql
greatsql> SELECT * FROM t1 JOIN t2 JOIN t3 ON t1.c1=t2.cc1 AND t1.c1=t3.ccc1 WHERE t1.c2<@@optimizer_search_depth;
{
"attaching_conditions_to_tables": {
"original_condition": "((`t2`.`cc1` = `t1`.`c1`) and (`t3`.`ccc1` = `t1`.`c1`) and (`t1`.`c2` < ))",
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`t1`",
"attached": "(`t1`.`c2` < )"
},
{
"table": "`t2`",
"attached": "(`t2`.`cc1` = `t1`.`c1`)"
},
{
"table": "`t3`",
"attached": "(`t3`.`ccc1` = `t1`.`c1`)"
}
]
}
},
{
"finalizing_table_conditions": [
{
"table": "`t1`",
"original_table_condition": "(`t1`.`c2` < )",
"final_table_condition ": "(`t1`.`c2` < ())" 这里看到条件里面的系统变量被转变为cache了
},
{
"table": "`t2`",
"original_table_condition": "(`t2`.`cc1` = `t1`.`c1`)",
"final_table_condition ": null
},
{
"table": "`t3`",
"original_table_condition": "(`t3`.`ccc1` = `t1`.`c1`)",
"final_table_condition ": null
}
]
},
```
以下例子也会把条件转换为cache,因为f1(1)是INNER_TABLE_BIT属性,如果改为f1(t1.c2)就不能转为cache了,因为f1(t1.c2)是NO DETERMINISTIC不确定的,非INNER_TABLE_BIT属性。
```sql
SET GLOBAL log_bin_trust_function_creators=1;
SET sql_mode=ORACLE;
DELIMITER $$
CREATE OR REPLACE FUNCTION f1 (id int) RETURN INT DETERMINISTIC IS
BEGIN
RETURN id;
END;
$$
DELIMITER ;
greatsql> SELECT * FROM t1 JOIN t2 JOIN t3 ON t1.c1=t2.cc1 AND t1.c1=t3.ccc1 WHERE t1.c2
四、总结
从上面优化器的步骤我们认识了finalize_table_conditions
函数的使用方法,也知道了什么时候表的条件需要进行删除或者转换,最后学会了Item cache的生成条件。到这里一个优化器的工作快要结束了,最后还有一个临时表需要创建,这个下一期讲。
Enjoy GreatSQL 😃
关于 GreatSQL
GreatSQL是适用于金融级应用的国内自主开源数据库,具备高性能、高可靠、高易用性、高安全等多个核心特性,可以作为MySQL或Percona Server的可选替换,用于线上生产环境,且完全免费并兼容MySQL或Percona Server。
相关链接: GreatSQL社区 Gitee GitHub Bilibili
GreatSQL社区:
社区博客有奖征稿详情:https://greatsql.cn/thread-100-1-1.html
技术交流群:
微信:扫码添加
GreatSQL社区助手
微信好友,发送验证信息加群
。