2013-08-07  星期三 下午

---------------研究联合索引---------------------

SQL> conn hr/hr

Connected.

SQL> create table test1 as select * from all_objects;

Table created.

SQL> create index ind_id_typ on test1(object_id,object_type);  --创建联合索引

Index created.

SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);

PL/SQL procedure successfully completed.

SQL> create index ind_id on test1(object_id);  --创建单字段索引

Index created.

SQL> create index ind_type on test1(object_type);  --创建单字段索引

Index created.

SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);

PL/SQL procedure successfully completed.

SQL> select * from test1 where object_id=28;

no rows selected

Execution Plan

----------------------------------------------------------

Plan hash value: 1747753053

--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |     1 |    95 |     2   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TEST1  |     1 |    95 |     2   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_ID |     1 |       |     1   (0)| 00:00:01 |

--------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  2 - access("OBJECT_ID"=28)

Statistics

----------------------------------------------------------

         1  recursive calls

         0  db block gets

         2  consistent gets

         0  physical reads

         0  redo size

       995  bytes sent via SQL*Net to client

       389  bytes received via SQL*Net from client

         1  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

         0  rows processed

当联合索引和单字段索引同时存在的话,优先选择单一索引,因为扫的叶子块字节数会少。

SQL> drop index ind_id;

Index dropped.

SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);

PL/SQL procedure successfully completed.

SQL> select * from test1 where object_id=28;

no rows selected

Execution Plan

----------------------------------------------------------

Plan hash value: 1725600915

------------------------------------------------------------------------------------------

| Id  | Operation                   | Name       | Rows  | Bytes | Cost (%CPU)| Time     |

------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |            |     1 |    95 |     3   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TEST1      |     1 |    95 |     3   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_ID_TYP |     1 |       |     2   (0)| 00:00:01 |

------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  2 - access("OBJECT_ID"=28)

Statistics

----------------------------------------------------------

         1  recursive calls

         0  db block gets

         2  consistent gets

         0  physical reads

         0  redo size

       995  bytes sent via SQL*Net to client

       389  bytes received via SQL*Net from client

         1  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

         0  rows processed

查询是走联合索引的。

如果where条件落在联合索引的第二列上,是否会走索引?

SQL> select * from test1 where object_type='JOB';

no rows selected

Execution Plan

----------------------------------------------------------

Plan hash value: 3495195170

----------------------------------------------------------------------------------------

| Id  | Operation                   | Name     | Rows  | Bytes | Cost (%CPU)| Time     |

----------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |          |  2924 |   271K|   101   (0)| 00:00:02 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TEST1    |  2924 |   271K|   101   (0)| 00:00:02 |

|*  2 |   INDEX RANGE SCAN          | IND_TYPE |  2924 |       |     9   (0)| 00:00:01 |

----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  2 - access("OBJECT_TYPE"='JOB')  --因为有个单字段索引,所以优先选择。

SQL> drop index ind_type;

Index dropped.

SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);

PL/SQL procedure successfully completed.

SQL> select * from test1 where object_type='JOB';

no rows selected

Execution Plan

----------------------------------------------------------

Plan hash value: 4122059633

---------------------------------------------------------------------------

| Id  | Operation         | Name  | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |       |     5 |   475 |   133   (2)| 00:00:02 |

|*  1 |  TABLE ACCESS FULL| TEST1 |     5 |   475 |   133   (2)| 00:00:02 |

---------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  1 - filter("OBJECT_TYPE"='JOB')

此时没有用索引:

联合索引:id + type +rowid,id,type都参与排序,但是type是在ID组内排序的,

大体上来看,type的值是没有顺序的,是散乱的,而且root阶段和分支节点存储的值范围是按照第一个字段id来的,

索引当where条件中出现type的时候,是用不到联合索引的。

两外需要注意的是,联合索引的第一列要求重复率比较低效果才很明显。

---------------------------------------------

创建位图索引应用的案例

create table test(gender not null,location not null,age_group not null,data)

as select decode(ceil(dbms_random.value(0,2)),'1','M','2','F') gender,

ceil(dbms_random.value(1,50)) location,

decode(ceil(dbms_random.value(0,5)),'1','18 and under','2','19-25','3','26-30','4','31-50','5','41 and over') age_group,

rpad('*',20,'*') data from t2;

SQL> select count(1) from test;

 COUNT(1)

----------

  1000000

需求1:

select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';

需求2:

select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under'

需求3:

select count(1) from test where location in(1,10,30);

需求4:

select count(1) from test where age_group='41 and over' and gender='F'

分析怎样规划索引能最大限度的满足上面四个需求的实现?

where条件中过滤字段重复率是很高的,如果用B树索引做的话:

方案一(建立B树索引):

1、gender、location、age_group三个字段上作联合索引

SQL> create index ind_g_l_a on test(gender,location,age_group); 满足需求1和需求2

Index created.

SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);

PL/SQL procedure successfully completed.

需求1:

SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';

这个索引用的比较好的原因是,gender只有两组,只要过滤了其中的一组,location就是升序排列的,

就可以用索引的。

Execution Plan

----------------------------------------------------------

Plan hash value: 2282520444

--------------------------------------------------------------------------------

| Id  | Operation          | Name      | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------

|   0 | SELECT STATEMENT   |           |     1 |    14 |    23   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE    |           |     1 |    14 |            |          |

|   2 |   INLIST ITERATOR  |           |       |       |            |          |

|*  3 |    INDEX RANGE SCAN| IND_G_L_A |  6080 | 85120 |    23   (0)| 00:00:01 |

--------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  3 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR

             "LOCATION"=30) AND "AGE_GROUP"='41 and over')

需求2:

SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';

这个索引用的比较好的原因是,gender只有两组,只要过滤了其中的一组,location就是升序排列的,

就可以用索引的。

Execution Plan

----------------------------------------------------------

Plan hash value: 1740203256

---------------------------------------------------------------------------------

| Id  | Operation           | Name      | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------------

|   0 | SELECT STATEMENT    |           |     1 |    14 |    33   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE     |           |     1 |    14 |            |          |

|   2 |   CONCATENATION     |           |       |       |            |          |

|*  3 |    INDEX RANGE SCAN | IND_G_L_A |  2041 | 28574 |    10   (0)| 00:00:01 |

|   4 |    INLIST ITERATOR  |           |       |       |            |          |

|*  5 |     INDEX RANGE SCAN| IND_G_L_A |  6018 | 84252 |    23   (0)| 00:00:01 |

---------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  3 - access("GENDER"='F' AND "LOCATION"=22 AND "AGE_GROUP"='18 and

             under')

  5 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR

             "LOCATION"=30) AND "AGE_GROUP"='18 and under')

      filter(LNNVL("LOCATION"=22) OR LNNVL("GENDER"='F'))

需求3:

SQL> select count(1) from test where location in(1,10,30);  --虽然用了索引,但是不是用的很好,代价比较高。

原因是where条件没有落在索引的第一个字段上,由于location排列是散乱的,所以全扫了索引。没有分组。

Execution Plan

----------------------------------------------------------

Plan hash value: 1529956083

-----------------------------------------------------------------------------------

| Id  | Operation             | Name      | Rows  | Bytes | Cost (%CPU)| Time     |

-----------------------------------------------------------------------------------

|   0 | SELECT STATEMENT      |           |     1 |     3 |   793   (6)| 00:00:10 |

|   1 |  SORT AGGREGATE       |           |     1 |     3 |            |          |

|*  2 |   INDEX FAST FULL SCAN| IND_G_L_A | 60799 |   178K|   793   (6)| 00:00:10 |

-----------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  2 - filter("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)

需求4:

SQL> select count(1) from test where age_group='41 and over' and gender='F';  --代价相对较高

虽然也分组了,如果where落在在location,肯定用索引的,但是,落在了age_group,此时第三列是散乱的。

Execution Plan

----------------------------------------------------------

Plan hash value: 311740366

------------------------------------------------------------------------------

| Id  | Operation        | Name      | Rows  | Bytes | Cost (%CPU)| Time     |

------------------------------------------------------------------------------

|   0 | SELECT STATEMENT |           |     1 |    11 |   409   (4)| 00:00:05 |

|   1 |  SORT AGGREGATE  |           |     1 |    11 |            |          |

|*  2 |   INDEX SKIP SCAN| IND_G_L_A |   100K|  1074K|   409   (4)| 00:00:05 |

------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  2 - access("GENDER"='F' AND "AGE_GROUP"='41 and over')

      filter("AGE_GROUP"='41 and over')

Statistics

----------------------------------------------------------

         1  recursive calls

         0  db block gets

       450  consistent gets

         0  physical reads

         0  redo size

       413  bytes sent via SQL*Net to client

       400  bytes received via SQL*Net from client

         2  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

         1  rows processed

create index ind_g_l on test(gender,location);仅仅能满足1,2

create index ind_a_l on test(age_group,location);满足4

create index ind_l_g_a on test(location,gender,age_group); 满足1,2,3

-------------------------------------------------------

--=方案二:创建位图索引

SQL> drop index ind_g_l_a;

Index dropped.

SQL> create bitmap index gender_idx on test(gender);

Index created.

SQL> create bitmap index location_idx on test(location);

Index created.

SQL> create bitmap index age_group_idx on test(age_group);

Index created.

SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);

PL/SQL procedure successfully completed.

需求1:

SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';

Execution Plan

----------------------------------------------------------

Plan hash value: 320981916

-----------------------------------------------------------------------------------------------

| Id  | Operation                     | Name          | Rows  | Bytes | Cost (%CPU)| Time     |

-----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT              |               |     1 |    14 |    53   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE               |               |     1 |    14 |            |          |

|   2 |   BITMAP CONVERSION COUNT     |               |  5036 | 70504 |    53   (0)| 00:00:01 |

|   3 |    BITMAP AND                 |               |       |       |            |          |

|   4 |     BITMAP OR                 |               |       |       |            |          |

|*  5 |      BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|*  6 |      BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|*  7 |      BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|*  8 |     BITMAP INDEX SINGLE VALUE | AGE_GROUP_IDX |       |       |            |          |

|*  9 |     BITMAP INDEX SINGLE VALUE | GENDER_IDX    |       |       |            |          |

-----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  5 - access("LOCATION"=1)

  6 - access("LOCATION"=10)

  7 - access("LOCATION"=30)

  8 - access("AGE_GROUP"='41 and over')

  9 - access("GENDER"='M')

需求2:

SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';

Execution Plan

----------------------------------------------------------

Plan hash value: 809694946

-------------------------------------------------------------------------------------------------

| Id  | Operation                       | Name          | Rows  | Bytes | Cost (%CPU)| Time     |

-------------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT                |               |     1 |    14 |    68   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE                 |               |     1 |    14 |            |          |

|   2 |   BITMAP CONVERSION COUNT       |               |  7078 | 99092 |    68   (0)| 00:00:01 |

|   3 |    BITMAP AND                   |               |       |       |            |          |

|*  4 |     BITMAP INDEX SINGLE VALUE   | AGE_GROUP_IDX |       |       |            |          |

|   5 |     BITMAP OR                   |               |       |       |            |          |

|   6 |      BITMAP AND                 |               |       |       |            |          |

|   7 |       BITMAP OR                 |               |       |       |            |          |

|*  8 |        BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|*  9 |        BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|* 10 |        BITMAP INDEX SINGLE VALUE| LOCATION_IDX  |       |       |            |          |

|* 11 |       BITMAP INDEX SINGLE VALUE | GENDER_IDX    |       |       |            |          |

|  12 |      BITMAP AND                 |               |       |       |            |          |

|* 13 |       BITMAP INDEX SINGLE VALUE | LOCATION_IDX  |       |       |            |          |

|* 14 |       BITMAP INDEX SINGLE VALUE | GENDER_IDX    |       |       |            |          |

-------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  4 - access("AGE_GROUP"='18 and under')

  8 - access("LOCATION"=1)

  9 - access("LOCATION"=10)

 10 - access("LOCATION"=30)

 11 - access("GENDER"='M')

 13 - access("LOCATION"=22)

 14 - access("GENDER"='F')

需求3:

SQL> select count(1) from test where location in(1,10,30);

Execution Plan

----------------------------------------------------------

Plan hash value: 2259268895

---------------------------------------------------------------------------------------------

| Id  | Operation                    | Name         | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT             |              |     1 |     3 |    13   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE              |              |     1 |     3 |            |          |

|   2 |   INLIST ITERATOR            |              |       |       |            |          |

|   3 |    BITMAP CONVERSION COUNT   |              | 50540 |   148K|    13   (0)| 00:00:01 |

|*  4 |     BITMAP INDEX SINGLE VALUE| LOCATION_IDX |       |       |            |          |

---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  4 - access("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)

需求4:

SQL> select count(1) from test where age_group='41 and over' and gender='F';

Execution Plan

----------------------------------------------------------

Plan hash value: 2381702022

----------------------------------------------------------------------------------------------

| Id  | Operation                    | Name          | Rows  | Bytes | Cost (%CPU)| Time     |

----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT             |               |     1 |    11 |    40   (0)| 00:00:01 |

|   1 |  SORT AGGREGATE              |               |     1 |    11 |            |          |

|   2 |   BITMAP CONVERSION COUNT    |               | 99957 |  1073K|    40   (0)| 00:00:01 |

|   3 |    BITMAP AND                |               |       |       |            |          |

|*  4 |     BITMAP INDEX SINGLE VALUE| AGE_GROUP_IDX |       |       |            |          |

|*  5 |     BITMAP INDEX SINGLE VALUE| GENDER_IDX    |       |       |            |          |

----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):

---------------------------------------------------

  4 - access("AGE_GROUP"='41 and over')

  5 - access("GENDER"='F')

**************************************************************

结论:

综上实验来看,从全局考虑,创建位图索引要优于B树索引。

创建索引的技术思路:

1、第一个方面:技术场景

2、第二个方面:业务需求(转化为实现的SQL)场景

3、综合考虑方案的选择,既要总体上满足现有的需求,而且要兼顾需求的扩展,

   综合考虑整体的性能,要顾全大局。

-----------------------------------