2013-08-07 星期三 下午
---------------研究联合索引---------------------
SQL> conn hr/hr
Connected.
SQL> create table test1 as select * from all_objects;
Table created.
SQL> create index ind_id_typ on test1(object_id,object_type); --创建联合索引
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> create index ind_id on test1(object_id); --创建单字段索引
Index created.
SQL> create index ind_type on test1(object_type); --创建单字段索引
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_id=28;
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 1747753053
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 95 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 1 | 95 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_ID | 1 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"=28)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
2 consistent gets
0 physical reads
0 redo size
995 bytes sent via SQL*Net to client
389 bytes received via SQL*Net from client
1 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
0 rows processed
当联合索引和单字段索引同时存在的话,优先选择单一索引,因为扫的叶子块字节数会少。
SQL> drop index ind_id;
Index dropped.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_id=28;
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 1725600915
------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 95 | 3 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 1 | 95 | 3 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_ID_TYP | 1 | | 2 (0)| 00:00:01 |
------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"=28)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
2 consistent gets
0 physical reads
0 redo size
995 bytes sent via SQL*Net to client
389 bytes received via SQL*Net from client
1 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
0 rows processed
查询是走联合索引的。
如果where条件落在联合索引的第二列上,是否会走索引?
SQL> select * from test1 where object_type='JOB';
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 3495195170
----------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 2924 | 271K| 101 (0)| 00:00:02 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 2924 | 271K| 101 (0)| 00:00:02 |
|* 2 | INDEX RANGE SCAN | IND_TYPE | 2924 | | 9 (0)| 00:00:01 |
----------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_TYPE"='JOB') --因为有个单字段索引,所以优先选择。
SQL> drop index ind_type;
Index dropped.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_type='JOB';
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 4122059633
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5 | 475 | 133 (2)| 00:00:02 |
|* 1 | TABLE ACCESS FULL| TEST1 | 5 | 475 | 133 (2)| 00:00:02 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("OBJECT_TYPE"='JOB')
此时没有用索引:
联合索引:id + type +rowid,id,type都参与排序,但是type是在ID组内排序的,
大体上来看,type的值是没有顺序的,是散乱的,而且root阶段和分支节点存储的值范围是按照第一个字段id来的,
索引当where条件中出现type的时候,是用不到联合索引的。
两外需要注意的是,联合索引的第一列要求重复率比较低效果才很明显。
---------------------------------------------
创建位图索引应用的案例
create table test(gender not null,location not null,age_group not null,data)
as select decode(ceil(dbms_random.value(0,2)),'1','M','2','F') gender,
ceil(dbms_random.value(1,50)) location,
decode(ceil(dbms_random.value(0,5)),'1','18 and under','2','19-25','3','26-30','4','31-50','5','41 and over') age_group,
rpad('*',20,'*') data from t2;
SQL> select count(1) from test;
COUNT(1)
----------
1000000
需求1:
select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
需求2:
select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under'
需求3:
select count(1) from test where location in(1,10,30);
需求4:
select count(1) from test where age_group='41 and over' and gender='F'
分析怎样规划索引能最大限度的满足上面四个需求的实现?
where条件中过滤字段重复率是很高的,如果用B树索引做的话:
方案一(建立B树索引):
1、gender、location、age_group三个字段上作联合索引
SQL> create index ind_g_l_a on test(gender,location,age_group); 满足需求1和需求2
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);
PL/SQL procedure successfully completed.
需求1:
SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
这个索引用的比较好的原因是,gender只有两组,只要过滤了其中的一组,location就是升序排列的,
就可以用索引的。
Execution Plan
----------------------------------------------------------
Plan hash value: 2282520444
--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 23 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | INLIST ITERATOR | | | | | |
|* 3 | INDEX RANGE SCAN| IND_G_L_A | 6080 | 85120 | 23 (0)| 00:00:01 |
--------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR
"LOCATION"=30) AND "AGE_GROUP"='41 and over')
需求2:
SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';
这个索引用的比较好的原因是,gender只有两组,只要过滤了其中的一组,location就是升序排列的,
就可以用索引的。
Execution Plan
----------------------------------------------------------
Plan hash value: 1740203256
---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 33 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | CONCATENATION | | | | | |
|* 3 | INDEX RANGE SCAN | IND_G_L_A | 2041 | 28574 | 10 (0)| 00:00:01 |
| 4 | INLIST ITERATOR | | | | | |
|* 5 | INDEX RANGE SCAN| IND_G_L_A | 6018 | 84252 | 23 (0)| 00:00:01 |
---------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("GENDER"='F' AND "LOCATION"=22 AND "AGE_GROUP"='18 and
under')
5 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR
"LOCATION"=30) AND "AGE_GROUP"='18 and under')
filter(LNNVL("LOCATION"=22) OR LNNVL("GENDER"='F'))
需求3:
SQL> select count(1) from test where location in(1,10,30); --虽然用了索引,但是不是用的很好,代价比较高。
原因是where条件没有落在索引的第一个字段上,由于location排列是散乱的,所以全扫了索引。没有分组。
Execution Plan
----------------------------------------------------------
Plan hash value: 1529956083
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 793 (6)| 00:00:10 |
| 1 | SORT AGGREGATE | | 1 | 3 | | |
|* 2 | INDEX FAST FULL SCAN| IND_G_L_A | 60799 | 178K| 793 (6)| 00:00:10 |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)
需求4:
SQL> select count(1) from test where age_group='41 and over' and gender='F'; --代价相对较高
虽然也分组了,如果where落在在location,肯定用索引的,但是,落在了age_group,此时第三列是散乱的。
Execution Plan
----------------------------------------------------------
Plan hash value: 311740366
------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 11 | 409 (4)| 00:00:05 |
| 1 | SORT AGGREGATE | | 1 | 11 | | |
|* 2 | INDEX SKIP SCAN| IND_G_L_A | 100K| 1074K| 409 (4)| 00:00:05 |
------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("GENDER"='F' AND "AGE_GROUP"='41 and over')
filter("AGE_GROUP"='41 and over')
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
450 consistent gets
0 physical reads
0 redo size
413 bytes sent via SQL*Net to client
400 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
create index ind_g_l on test(gender,location);仅仅能满足1,2
create index ind_a_l on test(age_group,location);满足4
create index ind_l_g_a on test(location,gender,age_group); 满足1,2,3
-------------------------------------------------------
--=方案二:创建位图索引
SQL> drop index ind_g_l_a;
Index dropped.
SQL> create bitmap index gender_idx on test(gender);
Index created.
SQL> create bitmap index location_idx on test(location);
Index created.
SQL> create bitmap index age_group_idx on test(age_group);
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);
PL/SQL procedure successfully completed.
需求1:
SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
Execution Plan
----------------------------------------------------------
Plan hash value: 320981916
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 53 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | BITMAP CONVERSION COUNT | | 5036 | 70504 | 53 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
| 4 | BITMAP OR | | | | | |
|* 5 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 6 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 7 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 8 | BITMAP INDEX SINGLE VALUE | AGE_GROUP_IDX | | | | |
|* 9 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
5 - access("LOCATION"=1)
6 - access("LOCATION"=10)
7 - access("LOCATION"=30)
8 - access("AGE_GROUP"='41 and over')
9 - access("GENDER"='M')
需求2:
SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';
Execution Plan
----------------------------------------------------------
Plan hash value: 809694946
-------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 68 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | BITMAP CONVERSION COUNT | | 7078 | 99092 | 68 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
|* 4 | BITMAP INDEX SINGLE VALUE | AGE_GROUP_IDX | | | | |
| 5 | BITMAP OR | | | | | |
| 6 | BITMAP AND | | | | | |
| 7 | BITMAP OR | | | | | |
|* 8 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 9 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 10 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 11 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
| 12 | BITMAP AND | | | | | |
|* 13 | BITMAP INDEX SINGLE VALUE | LOCATION_IDX | | | | |
|* 14 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
-------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("AGE_GROUP"='18 and under')
8 - access("LOCATION"=1)
9 - access("LOCATION"=10)
10 - access("LOCATION"=30)
11 - access("GENDER"='M')
13 - access("LOCATION"=22)
14 - access("GENDER"='F')
需求3:
SQL> select count(1) from test where location in(1,10,30);
Execution Plan
----------------------------------------------------------
Plan hash value: 2259268895
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 13 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 3 | | |
| 2 | INLIST ITERATOR | | | | | |
| 3 | BITMAP CONVERSION COUNT | | 50540 | 148K| 13 (0)| 00:00:01 |
|* 4 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)
需求4:
SQL> select count(1) from test where age_group='41 and over' and gender='F';
Execution Plan
----------------------------------------------------------
Plan hash value: 2381702022
----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 11 | 40 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 11 | | |
| 2 | BITMAP CONVERSION COUNT | | 99957 | 1073K| 40 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
|* 4 | BITMAP INDEX SINGLE VALUE| AGE_GROUP_IDX | | | | |
|* 5 | BITMAP INDEX SINGLE VALUE| GENDER_IDX | | | | |
----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("AGE_GROUP"='41 and over')
5 - access("GENDER"='F')
**************************************************************
结论:
综上实验来看,从全局考虑,创建位图索引要优于B树索引。
创建索引的技术思路:
1、第一个方面:技术场景
2、第二个方面:业务需求(转化为实现的SQL)场景
3、综合考虑方案的选择,既要总体上满足现有的需求,而且要兼顾需求的扩展,
综合考虑整体的性能,要顾全大局。
-----------------------------------