1CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
1id int unsigned not null auto_increment comment 'ID主键’,
2primary key(id),
1deleted tinyint not null DEFAULT 0 COMMENT '是否删除 0 未删除 1 删除 默认是0’,
2create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间,默认当前时间’,
3update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间,默认当前时间’,
4index idx_create_time(create_time),
5index idx_update_time(update_time),
备注:基于以上行长度和页长度的定义,也许有小伙伴会有疑问,如果表真的存放了 64KB 数据,那岂不是一行数据会占用四个数据页,一个数据页中只能存放一行数据(或一行数据的一部分)。那么此时 MySQL 的 B+ 树的数据存储结构就退变为线性了,这是 MySQL 设计中绝对不允许的!!! 按照 MySQL 的 B+ 树设计,一个数据页中至少要存放两行数据,否则 B+ 树会从树状结构退化为线性结构。 因此当行存储的实际数据过大,在页中存放不下时,MySQL 在存储这行记录的时候,会将较大的数据列的数据存放在外部存储页中,数据页只保存指向外部存储页的指针。此时如果存储读取这个大的数据列,会额外消耗更多的 IO
5.1. 字段过长会导致索引长度过长,超过一定长度只能创建前缀索引,而前缀索引不能走到索引覆盖
5.2. 数据读取到内存中是按照定义长度存放,过长的定义会占用更多内存空间
3.1 tinyint 1 字节;smallint 2 字节;int 4 字节;bigint 8 字节;
3.2 date 3 字节;datetime 8 字节;timestamp 4 字节;
3.3 字符类型括号中定义的数字为字符数,即 varchar(32) 可以存放 32 个数字或者汉字
3.4 字符类型长度跟字符集有关(其中 utf8 占用 3 字节,utf8mb4 占用 4 字节)
varchar(10) 类型(utf8mb4)字节数: 10*4B+1B=41B
varchar(100) 类型(utf8mb4)字节数:100*4B+2B=402B
char(10) 类型(utf8mb4)字节数:10*4B=40B
索引的长度限制及计算方式如下: 1.索引最大长度为 767 字节,若索引长度超过 767 字节将无法创建(可考虑创建前缀索引) 2.索引长度与字段定义长度基本相同,前缀索引长度与定义的前缀长度有关 3.变长类型如 varchar,额外需要 2 个字节存放索引长度 4.如果字段可以为空,额外需要 1 个字节存放为空标识 5.索引长度 = 字段长度 + 是否为空(+1) + 是否变长(+2)
1mysql> explain select * from t1 where name like '张三%';
2+----+-------------+-------+-------+---------------+----------+---------+------+------+-----------------------+
3| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
4+----+-------------+-------+-------+---------------+----------+---------+------+------+-----------------------+
5| 1 | SIMPLE | t1 | range | uni_name | uni_name | 43 | NULL | 1 | Using index condition |
6+----+-------------+-------+-------+---------------+----------+---------+------+------+-----------------------+
71 row in set (0.05 sec)
8mysql> show create table t1;
9| Table | Create Table
10| t1 | CREATE TABLE `t1` (
11 `id` int(11) DEFAULT NULL,
12 `name` varchar(10) DEFAULT NULL,
13 `age` int(11) DEFAULT NULL,
14 `cnts` int(11) NOT NULL DEFAULT '1',
15 `a1` varchar(10) DEFAULT NULL,
16 `a2` int(11) DEFAULT NULL,
17 `a3` varchar(32) DEFAULT NULL,
18 `a4` varchar(32) NOT NULL DEFAULT '',
19 `a5` int(11) DEFAULT '1',
20 UNIQUE KEY `uni_name` (`name`),
21 KEY `idx_a2` (`a2`)
22) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 |
1.寻道时间指的是磁臂移动到指定磁道所需要的时间,主流磁盘一般在 5ms 以下; 2.旋转延迟就是我们经常听说的磁盘转速,比如一个磁盘 7200 转,表示每分钟能转 7200 次,也就是说 1 秒钟能转 120 次,旋转延迟就是 1/120/2 = 4.17ms; 3.传输时间指的是从磁盘读出或将数据写入磁盘的时间,一般在零点几毫秒,相对于前两个时间可以忽略不计。
备注:索引树的高度跟磁盘 IO 次数呈正相关,可简单理解为索引树高度即是磁盘 IO 次数。
备注:B+ 树是在 B 树上优化衍生而来,本章节我们主要谈谈 B+ 树与 B 树的区别及优势
1.B 树的非叶子节点会存放数据,导致一个页存放的索引数较少,索引树较高
B+ 树的非叶子节点不会存放数据,只存放键值,一个页可以存放更多索引,索引树较矮
2.B 树的查找可能会在非叶子节点命中,查找不稳定
B+ 树的查找必须到叶子节点才会命中,查找十分稳定
3.B 树的范围遍历效率非常低
B+ 树的叶子节点中存放有双向指针构成一种链表结构,范围查询效率非常高效。而数据库的范围查询十分常见
备注:在 MySQL 的 B+ 树结构中,聚簇索引和非聚簇索引存储稍有差异 聚簇索引的 data 部分,存储的是具体的行数据 非聚簇索引的 data 部分,存储的是主键 ID 信息
假设:表平均行长度为 300B,主键索引列 ID 为 int 类型 普通索引列 name 为 varchar(20) 类型非空 (utf8mb4) 普通索引列 info 为 varchar(150) 类型可以为空 (utf8mb4) 分别了解主键索引和普通索引在不同类型下的最大能存储条目数 主键索引的叶子节点中存放具体行数据;普通索引的叶子节点中存放主键信息 备注:索引存储中除索引信息外,还会用 4B 存储页号,6B 存储其他数据
1每个非叶子节点可存放 key 个数为 M1=16KB/(4B+4B+6B)≈1170
2每个叶子节点可存放 key 个数为 M2=16KB/(300B+4B+6B) ≈52
3则 3 层索引最终能存放最大条目数为:L=1170*1170*52 ≈7118W
4 4 层索引最终能存放最大条目数为:L=1170*1170*1170*52 ≈832亿
1每个非叶子节点可存放 key 个数为 M1=16KB/(20*4B+2B+4B+6B)≈178
2每个叶子节点可存放 key 个数为 M2=16KB/(4B+20*4B+2B+4B+6B) ≈170
3则 3 层索引最终能存放最大条目数为:L= 178*178*170 ≈538W
4 4 层索引最终能存放最大条目数为:L= 178*178*178*170 ≈9.58亿
1每个非叶子节点可存放 key 个数为 M1=16KB/(150*4B+2B+1B+4B+6B)≈26
2每个叶子节点可存放 key 个数为 M2=16KB/(150*4B+2B+1B+4B+6B+4B) ≈26
3则 3 层索引最终能存放最大条目数为:L= 26*26*26 ≈17576
4 4 层索引最终能存放最大条目数为:L= 26*26*26*26 ≈456976
5 5 层索引最终能存放最大条目数为:L= 26*26*26*26*26 ≈1188W
备注:前缀索引无法走索引覆盖
1如 lessons 表的 les_uid 字段,定义为
2`les_uid` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
3但是其实该字段存放的时 uuid 信息,长度为固定的 32 位,该字段如需创建索引,则可考虑前缀索引,指定前缀长度为 32
4index idx_les_uid(les_uid(32)),
备注:组合索引的列数不宜过多,一般 2-3 列即可。列过多很难用到后面的列,且会增加索引长度。
1如经常查询需要查询学生在某一个时间范围的上课信息,可创建基于 stu_id 和 les_start_time 的组合索引
2index idx_stu_id_start_time(stu_id,les_start_time)
1mysql> explain select count(0) from students where stu_city='上海市';
2+----+-------------+----------+------+---------------+--------------+---------+-------+------+--------------------------+
3| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
4+----+-------------+----------+------+---------------+--------------+---------+-------+------+--------------------------+
5| 1 | SIMPLE | students | ref | idx_stu_city | idx_stu_city | 767 | const | 2664 | Using where; Using index |
6+----+-------------+----------+------+---------------+--------------+---------+-------+------+--------------------------+
71 row in set (0.06 sec)
8该查询统计行数信息,而 stu_city 列有索引,查询只需要使用 idx_stu_city 扫描即可取得 count 结果,无需回表取其他列数据。
9extra 列的 using index 表明查询走索引覆盖
备注:如果单列索引创建的不合理,比如在 bu、state、status 等列上创建单列索引,当 index_merge 使用到这些列做索引扫描合并,那么查询效率会非常低,需要关注!!!产生这种问题的主要原因是在选择性低的列上创建了索引,选择性低的列不适合创建索引,可能会降低查询效率!!!
1index_merge之using union:
2mysql> explain
3select count(0)
4from lessons
5where stu_id=116401
6or sel_id=1113;
7+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+-------------------------------------------------------------+
8| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
9+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+-------------------------------------------------------------+
10| 1 | SIMPLE | lessons | index_merge | lessons_stu_id,idx_stuid_lessub,IDX_LES_TYPE_STU_ID,stu_id_pay_type,idx_lessons_sel_id | lessons_stu_id,idx_lessons_sel_id | 4,5 | NULL | 1467 | Using union(lessons_stu_id,idx_lessons_sel_id); Using where |
11+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+-------------------------------------------------------------+
121 row in set (0.09 sec)
13查询使用 or 连接多个条件,查询分别使用 stu_id 和 sel_id 列索引进行扫描,并将扫描结果进行取并集(union)
14
15index_merge之using intersect:
16mysql> explain
17select count(0)
18from lessons
19where stu_id=116401
20and sel_id=1113;
21+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+------------------------------------------------------------------------------+
22| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
23+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+------------------------------------------------------------------------------+
24| 1 | SIMPLE | lessons | index_merge | lessons_stu_id,idx_stuid_lessub,IDX_LES_TYPE_STU_ID,stu_id_pay_type,idx_lessons_sel_id | lessons_stu_id,idx_lessons_sel_id | 4,5 | NULL | 1 | Using intersect(lessons_stu_id,idx_lessons_sel_id); Using where; Using index |
25+----+-------------+---------+-------------+----------------------------------------------------------------------------------------+-----------------------------------+---------+------+------+------------------------------------------------------------------------------+
261 row in set (0.07 sec)
27查询使用 and 连接多个条件,查询分别使用 stu_id 和 sel_id 列索引进行扫描,并将扫描结果进行取交集(intersect)
Id:id 列数字越大越先执行,数字一样则从上而下执行
Type:依次从好到差:system,const,eq_ref,ref,fulltext,ref_or_null,index_merge,unique_subquery,index_subquery,range,index,ALL,除了 all 之外,其他的 type 都可以使用到索引,如果 type 为 all 就需要关注并优化 SQL 了。
Key:查询真正使用到的索引,select_type 为 index_merge 时,这里可能出现两个以上的索引,其他的 select_type 这里只会出现一个
Key_len:处理查询的索引长度,前文中已经介绍过 key_len 的计算规则。ICP 特性使用的索引不会计入
Rows:这里是执行计划中估算的扫描行数,不是精确值
Extra:如果你想要优化你的查询,那就要注意 extra 辅助信息中的 using filesort 和 using temporary ,这两项非常消耗性能,需要注意。
1for each row in t1 matching range {
2 for each row in t2 matching reference key {
3 for each row in t3 {
4 if row satisfies join conditions, send to client
5 }
6 }
7}
1for each row in t1 matching range {
2 for each row in t2 matching reference key {
3 store used columns from t1, t2 in join buffer
4 if buffer is full {
5 for each row in t3 {
6 for each t1, t2 combination in join buffer {
7 if row satisfies join conditions, send to client
8 }
9 }
10 empty join buffer
11 }
12 }
13 }
14if buffer is not empty {
15 for each row in t3 {
16 for each t1, t2 combination in join buffer {
17 if row satisfies join conditions, send to client
18 }
19 }
20}
MySQL 优化器会自动选择小表作为驱动表,以减少循环的次数。但并不会只以表大小作为唯一选择依据
最好不要干涉数据库的驱动表选择,让 MySQL 优化器自动选择最合适的表作为驱动表
查询会首先通过查询条件,过滤驱动表上的数据记录,筛选满足要求的结果放入缓存中。驱动表上需尽量通过索引扫描降低数据量。
被驱动表的关联字段上,需要有索引,否则每一次关联,被驱动表都要全表扫描,效率非常低。
被驱动表的字段类型、字符集、排序方式需和驱动表保持一致,否则无法直接关联
使用外关联时,只有基准表才能被选择为驱动表
如 A LEFT JOIN B ON A.KEY=B.KEY ,则只有 A 表才能被选择为驱动表,B 表不能作为驱动表
尽量避免全表扫描,应考虑在 where 和 order by 涉及的列上建立索引
被驱动表的关联字段需要创建索引,否则被驱动表会走全表扫描
索引遵循最左前缀匹配原则,like 写法只能将 % 放在右边,如 name like ‘掌门学员%’;若 % 放在左边会导致索引失效
应尽量避免在 where 子句中使用 != , <> ,not in 操作符,会导致索引失效
应尽量避免在 where 子句中使用 or 来连接条件,可尝试拆分为 union/union all
可考虑使用 join/left join 关联查询,替代子查询、in/not in 的写法,尽量不要用子查询
如果 in 的内容是连续的,可使用 between…and 或者 >….< 替代,改走范围扫描
不要在查询字段上使用函数或者表达式,会导致索引失效,可在参数字段上做函数或表达式运算
查询时需根据字段定义类型进行传参 ,若参数类型与字段定义类型不一致,会导致索引失效
不要使用 select * 写法,只查询需要的列
不要在数据库中使用变量 + 分组排序方式构造排序字段,MySQL 需要基于全量数据做排序分组,效率很低
通过 limit 方式分页会导致后续分页越来越慢,可取前一次分页的最大 ID 作为下一页参数输入,进行分页
不要通过 order by rand() 方式取随机数,效率极低。如果需要取随机数,可以先用随机数方法取得一个整数,然后根据 id>= 该整数即可。
禁止不必要的排序,排序操作极耗资源,不要轻易分组排序
不要在程序中使用 using index 强制索引写法
1mysql> explain
2select count(0)
3from students
4where students.created_at>='2021-05-01'
5 or students.referrer_user_id>0;
6+----+-------------+----------+------+----------------------------+------+---------+------+----------+-------------+
7| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
8+----+-------------+----------+------+----------------------------+------+---------+------+----------+-------------+
9| 1 | SIMPLE | students | ALL | created_at,idx_ref_user_id | NULL | NULL | NULL | 52833786 | Using where |
10+----+-------------+----------+------+----------------------------+------+---------+------+----------+-------------+
111 row in set (0.06 sec)
12说明:students 表的 created_at 和 referrer_user_id 列都有索引,但是由于查询使用 or 连接,导致无法走索引扫描
13type 为 all 说明查询走全表扫描,该查询 10min 仍然无法查询出结果
1mysql> explain
2select count(0)
3from
4(select id
5from students
6where students.created_at>='2021-05-01'
7union
8select id
9from students
10where students.referrer_user_id>0) as t;
11+------+--------------+------------+-------+-----------------+-----------------+---------+------+---------+--------------------------+
12| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
13+------+--------------+------------+-------+-----------------+-----------------+---------+------+---------+--------------------------+
14| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 7638172 | NULL |
15| 2 | DERIVED | students | range | created_at | created_at | 6 | NULL | 2085176 | Using where; Using index |
16| 3 | UNION | students | range | idx_ref_user_id | idx_ref_user_id | 4 | NULL | 5552996 | Using where; Using index |
17| NULL | UNION RESULT | <union2,3> | ALL | NULL | NULL | NULL | NULL | NULL | Using temporary |
18+------+--------------+------------+-------+-----------------+-----------------+---------+------+---------+--------------------------+
194 rows in set (0.24 sec)
20说明:改用 unio 后两个子查询中都能走各自对应的索引,并且因为是查询 ID 信息,查询走覆盖索引 using index,效率很高,该查询最终耗时 11.64s
1mysql> explain
2select sum(t.money)
3from students
4join
5(select payments.stu_id,payments.money
6from payments
7where payments.is_paid=1
8 and payments.is_canceled=0
9 and payments.money>0) as t on students.id=t.stu_id
10 where students.created_at>='2021-05-01'
11 and students.created_at< '2021-06-01';
12+----+-------------+------------+--------+-----------------------------------+---------+---------+----------+---------+-------------+
13| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
14+----+-------------+------------+--------+-----------------------------------+---------+---------+----------+---------+-------------+
15| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 2805183 | NULL |
16| 1 | PRIMARY | students | eq_ref | PRIMARY,created_at | PRIMARY | 4 | t.stu_id | 1 | Using where |
17| 2 | DERIVED | payments | ALL | IDX_MONEY_STU_ID,IDX_MONEY_SEL_ID | NULL | NULL | NULL | 5610366 | Using where |
18+----+-------------+------------+--------+-----------------------------------+---------+---------+----------+---------+-------------+
193 rows in set (0.09 sec)
20说明:payments 表使用单独的子查询,type 为 ALL 需要扫描整个 payments 表记录,将返回的结果存放在临时表,然后与 students 表进行匹配。查询耗时 22s
1mysql> explain
2select sum(payments.money)
3from students
4join payments on students.id=payments.stu_id
5where payments.is_paid=1
6 and payments.is_canceled=0
7 and payments.money>0
8 and students.created_at>='2021-05-01'
9 and students.created_at< '2021-06-01';
10+----+-------------+----------+-------+---------------------------------------------------+-----------------+---------+-------------------+---------+------------------------------------+
11| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
12+----+-------------+----------+-------+---------------------------------------------------+-----------------+---------+-------------------+---------+------------------------------------+
13| 1 | SIMPLE | students | range | PRIMARY,created_at | created_at | 6 | NULL | 1983818 | Using where; Using index |
14| 1 | SIMPLE | payments | ref | payments_stu_id,IDX_MONEY_STU_ID,IDX_MONEY_SEL_ID | payments_stu_id | 4 | forge.students.id | 1 | Using index condition; Using where |
15+----+-------------+----------+-------+---------------------------------------------------+-----------------+---------+-------------------+---------+------------------------------------+
162 rows in set (0.26 sec)
17说明:不使用子查询,改为 join 写法后。查询首先根据 students.created_at 走索引扫描,然后根据查询的结果与 payments 表的 stu_id 进行关联。查询耗时 1.85s
1mysql> explain
2select u.id
3from users u
4where u.updated_at > '2021-05-20 12:00:00'
5and u.id not in(select a.user_id from users_account_number a);
6+----+--------------------+-------+-----------------+----------------+----------------+---------+------+--------+--------------------------+
7| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
8+----+--------------------+-------+-----------------+----------------+----------------+---------+------+--------+--------------------------+
9| 1 | PRIMARY | u | range | idx_uptime | idx_uptime | 6 | NULL | 103664 | Using where; Using index |
10| 2 | DEPENDENT SUBQUERY | a | unique_subquery | user_id_unique | user_id_unique | 4 | func | 1 | Using index |
11+----+--------------------+-------+-----------------+----------------+----------------+---------+------+--------+--------------------------+
122 rows in set (0.06 sec)
13说明:查询是统计 users 表创建时间大于 '2021-05-20 12:00:00',且不在 users_account_number 表中的用户 ID
14请注意 id=2 的 select_typ 类型为 DEPENDENT SUBQUERY,表示外层 select 结果需要依赖于子查询的结果。效率会非常差
1mysql> explain
2select u.id
3from users u
4left join users_account_number a on u.id=a.user_id
5where u.updated_at > '2021-05-20 12:00:00'
6and a.user_id is null;
7+----+-------------+-------+--------+----------------+----------------+---------+--------------+--------+--------------------------------------+
8| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
9+----+-------------+-------+--------+----------------+----------------+---------+--------------+--------+--------------------------------------+
10| 1 | SIMPLE | u | range | idx_uptime | idx_uptime | 6 | NULL | 105958 | Using where; Using index |
11| 1 | SIMPLE | a | eq_ref | user_id_unique | user_id_unique | 4 | zm-user.u.id | 1 | Using where; Not exists; Using index |
12+----+-------------+-------+--------+----------------+----------------+---------+--------------+--------+--------------------------------------+
132 rows in set (0.07 sec)
14说明:查询 select_type 都变为 simple ,并且查询先基于 updated_at 做索引过滤,然后与 users_account_number 进行匹配,效率非常高
1mysql> explain
2 -> select *
3from users
4where mobile=13999999999;
5+----+-------------+-------+------+---------------------+------+---------+------+----------+-------------+
6| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
7+----+-------------+-------+------+---------------------+------+---------+------+----------+-------------+
8| 1 | SIMPLE | users | ALL | users_mobile_unique | NULL | NULL | NULL | 83319185 | Using where |
9+----+-------------+-------+------+---------------------+------+---------+------+----------+-------------+
101 row in set (0.08 sec)
11说明:users 表的 mobile 字段是 varchar 类型,并且创建有索引,但是输入参数是整形,导致查询无法走索引扫描,type 为 ALL 全表扫描。
1mysql> explain
2 -> select *
3from users
4where mobile='13999999999';
5+----+-------------+-------+-------+---------------------+---------------------+---------+-------+------+-------+
6| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
7+----+-------------+-------+-------+---------------------+---------------------+---------+-------+------+-------+
8| 1 | SIMPLE | users | const | users_mobile_unique | users_mobile_unique | 62 | const | 1 | NULL |
9+----+-------------+-------+-------+---------------------+---------------------+---------+-------+------+-------+
101 row in set (0.07 sec)
11说明:mobile 字段为 varchar 类型,因此在参数上使用‘’标识为字符类型,查询走到 mobile 列的唯一索引,效率非常高。
1mysql> explain
2select count(0)
3from students
4join students_seller on students.id=students_seller.student_id
5where date(students.created_at)='2021-05-05'
6 and students_seller.state=0;
7+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
8| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
9+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
10| 1 | SIMPLE | students | index | PRIMARY | created_at | 6 | NULL | 52853797 | Using where; Using index |
11| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
12+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
132 rows in set (0.09 sec)
14说明:由于在 students.created_at 字段上使用 date 函数,导致无法通过 created_at 列匹配满足时间要求的数据,通过 rows 列可以看到是全表扫描 5285w 数据量。
15或许有人会疑问既然是全表扫描,为什么 type 是 index 而不是 ALL。这是因为 students 表只用到 created_at 和 id 列,这两列是直接包含在索引中的,查询是走的覆盖索引。
1mysql> explain
2select count(0)
3from students
4join students_seller on students.id=students_seller.student_id
5where students.created_at>='2021-05-05 00:00:00'
6 and students.created_at<='2021-05-05 23:59:59'
7 and students_seller.state=0;
8+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
9| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
10+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
11| 1 | SIMPLE | students | range | PRIMARY,created_at | created_at | 6 | NULL | 134092 | Using where; Using index |
12| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
13+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
142 rows in set (0.09 sec)
15说明:由于是查询注册时间为 2021-05-05 日的学生,那么可以替代为使用 created_at>= ... and <= 的写法
16优化后查询根据 students.created_at 走索引范围查询,匹配行数 rows 为 134092 条,然后与 students_seller 表关联,效率非常高。
1mysql> explain
2select students.id,students.user_id,students_seller.seller_id,students.stu_city
3from students
4join students_seller on students.id=students_seller.student_id
5where students.created_at>='2021-01-01'
6 and students_seller.state=0
7 limit 100;
8 -- limit 1000000,100;
9+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
10| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
11+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
12| 1 | SIMPLE | students | range | PRIMARY,created_at | created_at | 6 | NULL | 23541528 | Using index condition |
13| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
14+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
152 rows in set (0.43 sec)
16说明:虽然 limit 100 和limit 1000000,100 的解释计划相同,但是执行时间差异非常大。
17limit m,n 中的 m 数越大,则查询越慢。limit 100 的执行时间 0.05s;limit 1000000,100 的执行时间 35s
1mysql> explain
2select students.id,students.user_id,students_seller.seller_id,students.stu_city
3from students
4join students_seller on students.id=students_seller.student_id
5where students.created_at>='2021-01-01'
6 and students_seller.state=0
7 and students.id>54978976 -- 取上一次分页的最大ID
8limit 100;
9+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
10| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
11+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
12| 1 | SIMPLE | students | range | PRIMARY,created_at | PRIMARY | 4 | NULL | 26431416 | Using where |
13| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
14+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+----------+------------------------------------+
152 rows in set (0.09 sec)
16说明:例如假设第 10000 页取到的 100 行数据中最大的 students.ID 为 54978976,现在需要取得第 10001 页的 100 行数据
17那么可以添加 students.id>54978976,然后取 100 行数据,此方法取得的即为第 10001 页的数据。
18 通过 id 字段的范围限制,比简单的 limit m,n 更加高效,即使是大的数据分页也不会导致效率变低。该方法执行时间稳定为 0.05s
1mysql> explain
2select students.id,students.user_id,students_seller.seller_id,students.stu_city
3from students
4join students_seller on students.id=students_seller.student_id
5where students.created_at>='2021-04-05'
6 and students.created_at<='2021-04-05 23:59:59'
7 and students_seller.state=0
8order by students.updated_at desc
9limit 100;
10+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+-------+------------------------------------+
11| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
12+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+-------+------------------------------------+
13| 1 | SIMPLE | students | index | PRIMARY,created_at | updated_at | 6 | NULL | 28608 | Using where |
14| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
15+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+-------+------------------------------------+
162 rows in set (0.08 sec)
17说明:查询是想取得学生注册时间在 2021-04-05 的,并且 state=0 的,按照更新时间取得最近 100 条学生记录。
18请注意解释计划中得 key 为 updated_at 字段,我们明明是对 created_at 字段做范围限制,为啥 MySQL 选择走 updated_at 列索引?
4.但如果一致没有取到满足 where 条件的 100 条结果,会一直循环操作直至遍历 students 整个表!那这个代价是非常恐怖的
1mysql> explain
2select students.id,students.user_id,students_seller.seller_id,students.stu_city
3from students
4join students_seller on students.id=students_seller.student_id
5where students.created_at>='2021-04-05'
6 and students.created_at<='2021-04-05 23:59:59'
7 and students_seller.state=0
8order by students.created_at desc
9limit 100;
10+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
11| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
12+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
13| 1 | SIMPLE | students | range | PRIMARY,created_at | created_at | 6 | NULL | 184782 | Using index condition |
14| 1 | SIMPLE | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
15+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
162 rows in set (0.09 sec)
17说明:排序字段修改为与查询字段一致即 students.created_at,由于索引查询本身就是排序的,不必再额外排序,效率非常高。
1mysql> explain
2select t.*
3from
4(select students.id,students.user_id,students_seller.seller_id,students.stu_city,students.updated_at
5from students
6join students_seller on students.id=students_seller.student_id
7where students.created_at>='2021-04-05'
8 and students.created_at<='2021-04-05 23:59:59'
9 and students_seller.state=0) as t
10order by t.updated_at desc
11limit 100;
12+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
13| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
14+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
15| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 184782 | Using filesort |
16| 2 | DERIVED | students | range | PRIMARY,created_at | created_at | 6 | NULL | 184782 | Using index condition |
17| 2 | DERIVED | students_seller | ref | student_id,idx_stu_sel_state | student_id | 5 | forge.students.id | 1 | Using index condition; Using where |
18+----+-------------+-----------------+-------+------------------------------+------------+---------+-------------------+--------+------------------------------------+
193 rows in set (0.08 sec)
20说明:查询根据 created_at 字段检索满足条件的记录构建为子查询。外层查询结果对子查询进行排序然后取得 100 条记录。
21此方法不会改变原始业务逻辑,如果满足条件结果集较小,效率很高;但是如果满足条件的中间结果集非常大,则查询效率也会较差
1mysql> EXPLAIN SELECT
2 count( 0 )
3FROM
4 `uke_hours`.uke_retire_record rr
5 JOIN `uke_hours`.uke_apply_refund_info ari ON rr.apply_refund_id = ari.id
6 JOIN `uke`.uke_student stu ON rr.user_id = stu.user_id
7 LEFT JOIN `forge`.students students ON students.user_id = ari.stu_user_Id
8 JOIN `zm-user`.users users ON rr.user_id = users.id
9 LEFT JOIN `forge`.sellers se ON se.user_id = rr.handel_user
10 JOIN (
11 SELECT
12 ard.apply_refund_id
13 FROM
14 `uke_hours`.uke_apply_refund_detail ard,
15 `uke`.uke_class c
16 WHERE
17 ard.prod_id = c.prod_id
18 AND c.is_deleted = 0
19 GROUP BY
20 ard.apply_refund_id
21 ) ard ON rr.apply_refund_id = ard.apply_refund_id
22WHERE
23 rr.is_new_data = 1
24 AND (
25 rr.cc_id IN ( 1071853472 )
26 OR rr.cr_id IN ( 1071853472 )
27 OR rr.team_user_id IN ( 1071853472 )
28 OR rr.apply_user_id IN ( 1071853472 ));
29+----+-------------+------------+--------+-----------------------------------------------------------------------+-----------------------------+---------+---------------------------+--------+---------------------------------+
30| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
31+----+-------------+------------+--------+-----------------------------------------------------------------------+-----------------------------+---------+---------------------------+--------+---------------------------------+
32| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 145787 | NULL |
33| 1 | PRIMARY | rr | ref | idx_apply_refund_id,idx_user_id,idx_apply_user_id,idx_cc_id,idx_cr_id | idx_apply_refund_id | 8 | ard.apply_refund_id | 1 | Using where |
34| 1 | PRIMARY | ari | eq_ref | PRIMARY | PRIMARY | 8 | ard.apply_refund_id | 1 | Using where |
35| 1 | PRIMARY | se | eq_ref | sellers_user_id | sellers_user_id | 4 | uke_hours.rr.handel_user | 1 | Using where; Using index |
36| 1 | PRIMARY | stu | ref | idx_userId_isDeleted_unique | idx_userId_isDeleted_unique | 8 | uke_hours.rr.user_id | 1 | Using index |
37| 1 | PRIMARY | students | eq_ref | user_id | user_id | 4 | uke_hours.ari.stu_user_id | 1 | Using where; Using index |
38| 1 | PRIMARY | users | eq_ref | PRIMARY | PRIMARY | 4 | uke_hours.rr.user_id | 1 | Using where; Using index |
39| 2 | DERIVED | ard | ALL | idx_apply_refund_id,idx_prod_id | NULL | NULL | NULL | 145787 | Using temporary; Using filesort |
40| 2 | DERIVED | c | ref | idx_prod_version | idx_prod_version | 130 | uke_hours.ard.prod_id | 1 | Using where |
41+----+-------------+------------+--------+-----------------------------------------------------------------------+-----------------------------+---------+---------------------------+--------+---------------------------------+
429 rows in set (0.12 sec),实际执行时间 1.267s
43说明:该查询问题有二。
44其一,查询中用到子查询,id=2 的 derived 子查询走 type=all 得全表扫描,且因为 group by 语句需要走文件排序操作
45其二,查询条件使用到 or 条件,并且由于 or 的列太多,查询无法使用索引.只能基于 id=2 中子查询结果驱动整个查询
1mysql> EXPLAIN SELECT
2 count( 0 )
3FROM
4 `uke_hours`.uke_retire_record rr
5 JOIN `uke_hours`.uke_apply_refund_info ari ON rr.apply_refund_id = ari.id
6 JOIN `uke`.uke_student stu ON rr.user_id = stu.user_id
7 LEFT JOIN `forge`.students students ON students.user_id = ari.stu_user_Id
8 JOIN `zm-user`.users users ON rr.user_id = users.id
9 LEFT JOIN `forge`.sellers se ON se.user_id = rr.handel_user
10 JOIN `uke_hours`.uke_apply_refund_detail ard on rr.apply_refund_id = ard.apply_refund_id
11 join `uke`.uke_class c on ard.prod_id = c.prod_id
12 AND c.is_deleted = 0
13WHERE
14 rr.is_new_data = 1
15 AND (
16 rr.cc_id IN ( 1071853472 )
17 OR rr.cr_id IN ( 1071853472 ) );
18+----+-------------+----------+-------------+-----------------------------------------------------+-----------------------------+---------+------------------------------+------+-----------------------------------------------+
19| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
20+----+-------------+----------+-------------+-----------------------------------------------------+-----------------------------+---------+------------------------------+------+-----------------------------------------------+
21| 1 | SIMPLE | rr | index_merge | idx_apply_refund_id,idx_user_id,idx_cc_id,idx_cr_id | idx_cc_id,idx_cr_id | 9,9 | NULL | 2 | Using union(idx_cc_id,idx_cr_id); Using where |
22| 1 | SIMPLE | ari | eq_ref | PRIMARY | PRIMARY | 8 | uke_hours.rr.apply_refund_id | 1 | Using where |
23| 1 | SIMPLE | se | eq_ref | sellers_user_id | sellers_user_id | 4 | uke_hours.rr.handel_user | 1 | Using where; Using index |
24| 1 | SIMPLE | ard | ref | idx_apply_refund_id,idx_prod_id | idx_apply_refund_id | 8 | uke_hours.rr.apply_refund_id | 1 | NULL |
25| 1 | SIMPLE | c | ref | idx_prod_version | idx_prod_version | 130 | uke_hours.ard.prod_id | 1 | Using where |
26| 1 | SIMPLE | users | eq_ref | PRIMARY | PRIMARY | 4 | uke_hours.rr.user_id | 1 | Using where; Using index |
27| 1 | SIMPLE | stu | ref | idx_userId_isDeleted_unique | idx_userId_isDeleted_unique | 8 | uke_hours.rr.user_id | 1 | Using index |
28| 1 | SIMPLE | students | eq_ref | user_id | user_id | 4 | uke_hours.ari.stu_user_id | 1 | Using where; Using index |
29+----+-------------+----------+-------------+-----------------------------------------------------+-----------------------------+---------+------------------------------+------+-----------------------------------------------+
308 rows in set (0.08 sec),实际执行时间 0.033s
31说明:改写后 where 查询中只保留 cc_id 和 cr_id ,由于都是 in 的等值查询,且列上均有索引条件,查询可以使用 index_merge 的优化特性,扫描行数为 2
32
33并且查询将子查询写法替换为 join ,使得原本子查询中的全表扫描,变为普通的表关联,查询效率得到极大提升。优化效率 38 倍
1mysql> EXPLAIN
2SELECT
3 ss.id,
4 ss.user_id userId,
5 ss.scan_state scanCode,
6 ss.bu,
7 ss.source
8FROM
9 t_screenshot ss
10 LEFT JOIN t_screenshot_analyze sa ON ss.id = sa.screenshot_id
11WHERE
12 ss.audit_state = 0
13 AND ss.re_upload_time IS NULL
14 AND ss.bu IN ( 3, 5, 4, 11, 12, 1, 2 )
15 AND ((
16 ss.re_apply_time IS NOT NULL
17 AND ss.re_apply_time >= '2021-05-14 11:30:00.145'
18 AND ss.re_apply_time <= '2021-05-21 10:30:00.145' )
19 OR ( ss.re_apply_time IS NULL
20 AND ss.upload_time >= '2021-05-14 11:30:00.145'
21 AND ss.upload_time <= '2021-05-21 10:30:00.145'
22 ))
23 AND (
24 sa.poster_tag != 2
25 OR sa.poster_tag IS NULL);
26+----+-------------+-------+--------+------------------------------------------------------+--------------------+---------+--------------------+---------+------------------------------------+
27| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
28+----+-------------+-------+--------+------------------------------------------------------+--------------------+---------+--------------------+---------+------------------------------------+
29| 1 | SIMPLE | ss | ref | idx_create_time,idx_re_upload_time,idx_re_apply_time | idx_re_upload_time | 6 | const | 1393993 | Using index condition; Using where |
30| 1 | SIMPLE | sa | eq_ref | PRIMARY | PRIMARY | 8 | market_audit.ss.id | 1 | Using where |
31+----+-------------+-------+--------+------------------------------------------------------+--------------------+---------+--------------------+---------+------------------------------------+
322 rows in set (0.07 sec)
33说明:看解释计划似乎查询是走到 type=ref 的非唯一索引扫描,实则查询只能根据 ss.re_upload_time IS NULL 这一个条件做简单的非空过滤
34按照表数据的组成,无异于全表扫描。该查询执行时间 4.87s
1mysql> EXPLAIN
2SELECT
3 ss.id,
4 ss.user_id userId,
5 ss.scan_state scanCode,
6 ss.bu,
7 ss.source
8FROM
9 t_screenshot ss
10 LEFT JOIN t_screenshot_analyze sa ON ss.id = sa.screenshot_id
11WHERE
12 ss.audit_state = 0
13 AND ss.re_upload_time IS NULL
14 AND ss.bu IN ( 3, 5, 4, 11, 12, 1, 2 )
15
16 AND ss.re_apply_time IS NOT NULL
17 AND ss.re_apply_time >= '2021-05-14 11:30:00.145'
18 AND ss.re_apply_time <= '2021-05-21 10:30:00.145'
19
20 AND (
21 sa.poster_tag != 2
22 OR sa.poster_tag IS NULL)
23union
24SELECT
25 ss.id,
26 ss.user_id userId,
27 ss.scan_state scanCode,
28 ss.bu,
29 ss.source
30FROM
31 t_screenshot ss
32 LEFT JOIN t_screenshot_analyze sa ON ss.id = sa.screenshot_id
33WHERE
34 ss.audit_state = 0
35 AND ss.re_upload_time IS NULL
36 AND ss.bu IN ( 3, 5, 4, 11, 12, 1, 2 )
37
38 AND ss.re_apply_time IS NULL
39 AND ss.upload_time >= '2021-05-14 11:30:00.145'
40 AND ss.upload_time <= '2021-05-21 10:30:00.145'
41 AND (
42 sa.poster_tag != 2
43 OR sa.poster_tag IS NULL);
44+------+--------------+------------+--------+------------------------------------------------------+-------------------+---------+--------------------+--------+------------------------------------+
45| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
46+------+--------------+------------+--------+------------------------------------------------------+-------------------+---------+--------------------+--------+------------------------------------+
47| 1 | PRIMARY | ss | range | idx_re_upload_time,idx_re_apply_time | idx_re_apply_time | 6 | NULL | 137 | Using index condition; Using where |
48| 1 | PRIMARY | sa | eq_ref | PRIMARY | PRIMARY | 8 | market_audit.ss.id | 1 | Using where |
49| 2 | UNION | ss | range | idx_create_time,idx_re_upload_time,idx_re_apply_time | idx_create_time | 5 | NULL | 178928 | Using index condition; Using where |
50| 2 | UNION | sa | eq_ref | PRIMARY | PRIMARY | 8 | market_audit.ss.id | 1 | Using where |
51| NULL | UNION RESULT | <union1,2> | ALL | NULL | NULL | NULL | NULL | NULL | Using temporary |
52+------+--------------+------------+--------+------------------------------------------------------+-------------------+---------+--------------------+--------+------------------------------------+
535 rows in set (0.20 sec)
54说明:将 or 连接的条件改为 union 连接,改写后语句长度增加一倍。
55但是注意到 union 连接的两个子查询,都能根据对应的输入参数(upload_time 和 re_apply_time ),进行索引 range 扫描。
56基于索引范围的扫描,效率提升很多。该查询执行时间为 0.245s,优化效率提升 20 倍
1mysql> explain
2select
3 ss.student_id as studentId,
4 ss.pre_seller_id as preSellerId,
5 ss.state as state
6from
7 students_seller ss force index(PRIMARY)
8where
9 ss.bu = 0
10 and ss.seller_id is null
11order by
12 ss.id asc
13limit
14 115000, 5000;
15+----+-------------+-------+-------+---------------+---------+---------+------+--------+-------------+
16| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
17+----+-------------+-------+-------+---------------+---------+---------+------+--------+-------------+
18| 1 | SIMPLE | ss | index | NULL | PRIMARY | 4 | NULL | 120000 | Using where |
19+----+-------------+-------+-------+---------------+---------+---------+------+--------+-------------+
201 row in set (0.05 sec)
21说明:这条 SQL 的逻辑很简单,就是按照 bu=0 和 seller_id is null 作为条件,取得 students_seller 表所有满足条件的记录。
22由于满足要求的记录较多,该查询可能会循环执行几百次。从慢 SQL 记录中可以看到 2021-05-21 日该查询执行了 721 次,总执行时长 11670s,总扫描行数 363 亿行,返回 360 万行
23注意:limit 这种写法,解析行数会随着 limit m,n 后的 m 数增加导致解析行数持续增加,查询变慢。
1如:上一次循环取到的students_seller表的最大id为102378346
2mysql> explain
3 -> select
4 ss.student_id as studentId,
5 ss.pre_seller_id as preSellerId,
6 ss.state as state
7from
8 students_seller ss force index(primary)
9where
10 ss.bu = 0
11 and ss.seller_id is null
12 and id>=102378346
13order by ss.id asc
14limit 5000;
15+----+-------------+-------+-------+---------------+---------+---------+------+----------+-------------+
16| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
17+----+-------------+-------+-------+---------------+---------+---------+------+----------+-------------+
18| 1 | SIMPLE | ss | range | PRIMARY | PRIMARY | 4 | NULL | 45237438 | Using where |
19+----+-------------+-------+-------+---------------+---------+---------+------+----------+-------------+
201 row in set (0.07 sec)
21说明:按照 id 范围限制后,每次查询时间均非常快速,效率很高。
22按此方式优化后,每次查询时间稳定为 0.15s,全部扫描 721 次,总执行时长为 108.15s,效率提升 108 倍
Web
前端/ Java
/ iOS
/ 安卓 )、测试工程师( 功能/自动化/性能 )、DBA
、大数据工程师、算法工程师( OCR
/用户画像/推荐 )、K8s
架构师、运维工程师、产品经理、安全工程师、网络工程师。欢迎加入掌门教育大家庭,一起畅谈技术,分享交流。