Wednesday, April 29, 2009

Handle ORed Semi-Join conditions

Supposing we have a SQL using the EXISTS construct as follows:


select count(*) from t1 cbo
where exists
(select null from t2
where cbo.object_id = t2.object_id
or cbo.object_name = t2.object_name);

 
Here we have two semi-join conditions ORed together. What could be the best way to execute this kind of SQL? Here is my test case to answer this question in a 10g database.


1. Create the objects:


SQL> execute dbms_random.seed(0);

PL/SQL procedure successfully completed.

SQL>
SQL> create table t1
2 as select * from
3 ( select *
4 from all_objects
5 order by dbms_random.value
6 )
7 where rownum <=200;

Table created.

SQL>
SQL>
SQL> create table t2
2 as
3 select * from (
4 select * from all_objects order by object_type)
5 where rownum <= 10000;

Table created.

SQL>
SQL> insert into t2
2 select * from all_objects where rownum <= 10000;

10000 rows created.

SQL>
SQL> commit;

Commit complete.

SQL>
SQL> create index t2_idx on t2(object_id);

Index created.

SQL>
SQL> begin
2 dbms_stats.gather_table_stats(
3 user,
4 't1',
5 cascade => true
6 );
7 end;
8 /

PL/SQL procedure successfully completed.

SQL>
SQL>
SQL> begin
2 dbms_stats.gather_table_stats(
3 user,
4 't2',
5 cascade => true
6 );
7 end;
8 /




 

2. Oracle does not pick up semi-join access path for the questioned SQL:



SQL> select count(*) from t1 cbo
2 where exists
3 (select null from t2
4 where cbo.object_id = t2.object_id
5 or cbo.object_name = t2.object_name);

COUNT(*)
----------
103

SQL>
SQL> select * from table(dbms_xplan.display_cursor(null, null, 'allstats last outline'));

PLAN_TABLE_OUTPUT
---------------------------------------------------------
SQL_ID cuz85u83buju4, child number 0
-------------------------------------
select count(*) from t1 cbo where exists (select null from t2
where cbo.object_id = t2.object_id or cbo.object_name =
t2.object_name)

Plan hash value: 1280151244

--------------------------------------------------------------------------------------
Id Operation Name Starts E-Rows A-Rows A-Time Buffers
--------------------------------------------------------------------------------------
1 SORT AGGREGATE 1 1 1 00:00:01.95 43590
* 2 FILTER 1 103 00:00:01.95 43590
3 TABLE ACCESS FULL T1 1 200 200 00:00:00.01 5
* 4 TABLE ACCESS FULL T2 200 2 103 00:00:01.95 43585
--------------------------------------------------------------------------------------

Outline Data
-------------

/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('10.2.0.1')
ALL_ROWS
OUTLINE_LEAF(@"SEL$2")
OUTLINE_LEAF(@"SEL$1")
OUTLINE(@"SEL$2")
OUTLINE(@"SEL$1")
FULL(@"SEL$1" "CBO"@"SEL$1")
FULL(@"SEL$2" "T2"@"SEL$2")
END_OUTLINE_DATA
*/

Predicate Information (identified by operation id):
---------------------------------------------------

2 - filter( IS NOT NULL)
4 - filter(("T2"."OBJECT_NAME"=:B1 OR "T2"."OBJECT_ID"=:B2))



 

3. How about rewrite the SQL into two EXISTS constructs?
- Not much help, although buffer gets descreased from 43590 to 33834




SQL> select count(*) from t1 cbo
2 where exists (select null from t2 where cbo.object_id = t2.object_id )
3 or exists (select null from t2 where cbo.object_name = t2.object_name);

COUNT(*)
----------
103

SQL>
SQL> select * from table(dbms_xplan.display_cursor(null, null, 'allstats last outline'));

PLAN_TABLE_OUTPUT
---------------------------------------------------------
SQL_ID 2qxaf7m9c7z3r, child number 0
-------------------------------------
select count(*) from t1 cbo where exists (select null from t2 where
cbo.object_id = t2.object_id ) or exists (select null from t2 where
cbo.object_name = t2.object_name)

Plan hash value: 2450656000

------------------------------------------------------------------------------
Id Operation Name Starts E-Rows A-Rows A-Time Buffers
--------------------------------------------------------------------------------
1 SORT AGGREGATE 1 1 1 00:00:00.87 33834
* 2 FILTER 1 103 00:00:00.87 33834
3 TABLE ACCESS FULL T1 1 200 200 00:00:00.01 5
* 4 INDEX RANGE SCAN T2_IDX 200 1 70 00:00:00.01 400
* 5 TABLE ACCESS FULL T2 130 1 33 00:00:00.86 33429
-----------------------------------------------------------------------------------------




  

4. Rewrite the SQL into UNION construct.
This time the buffer gets is only 369 due to Oracle is able to take advantage of semi-join:






SQL> select count(*) from (
2 select * from t1 cbo
3 where exists (select null from t2 where cbo.object_id = t2.object_id )
4 union
5 select * from t1 cbo
6 where exists (select null from t2 where cbo.object_name = t2.object_name)
7 );

COUNT(*)
----------
103

SQL>
SQL> select * from table(dbms_xplan.display_cursor(null, null, 'allstats last outline'));

PLAN_TABLE_OUTPUT
---------------------------------------------------------
SQL_ID cmtfjwznktn3c, child number 0
-------------------------------------
select count(*) from ( select * from t1 cbo where exists (select null from t2 where cbo.object_id =
t2.object_id ) union select * from t1 cbo where exists (select null from t2 where cbo.object_name =
t2.object_name) )

Plan hash value: 3775847438

----------------------------------------------------------------------------------------------
Id Operation Name Starts E-Rows A-Rows A-ime Buffers OMem 1Mem Used-Mem
------------------------------------------------------------------------------------------------
1 SORT AGGREGATE 1 1 1 00.08 369
2 VIEW 1 400 103 00.08 369
3 SORT UNIQUE 1 400 103 00.08 369 184321843216384 (0)
4 UNION-ALL 1 173 00.08 369
* 5 HASH JOIN SEMI 1 200 70 00.04 54 762K 762K1184K (0)
6 TABLE ACCESS FULL T1 1 200 200 00.01 5
7 INDEX FAST FULL SCANT2_IDX 1 20000 20000 00.01 49
* 8 HASH JOIN SEMI 1 200 103 00.04 315 762K 762K1187K (0)
9 TABLE ACCESS FULL T1 1 200 200 00.01 5
10 TABLE ACCESS FULL T2 1 20000 20000 00.01 310
------------------------------------------------------------------------------------------------


Outline Data
-------------

/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('10.2.0.1')
ALL_ROWS
OUTLINE_LEAF(@"SEL$BE5C8E5F")
UNNEST(@"SEL$3")
OUTLINE_LEAF(@"SEL$385088EC")
UNNEST(@"SEL$5")
OUTLINE_LEAF(@"SET$1")
OUTLINE_LEAF(@"SEL$1")
OUTLINE(@"SEL$2")
OUTLINE(@"SEL$3")
OUTLINE(@"SEL$4")
OUTLINE(@"SEL$5")
OUTLINE(@"SET$1")
OUTLINE(@"SEL$1")
NO_ACCESS(@"SEL$1" "from$_subquery$_001"@"SEL$1")
FULL(@"SEL$385088EC" "CBO"@"SEL$4")
FULL(@"SEL$385088EC" "T2"@"SEL$5")
LEADING(@"SEL$385088EC" "CBO"@"SEL$4" "T2"@"SEL$5")
USE_HASH(@"SEL$385088EC" "T2"@"SEL$5")
FULL(@"SEL$BE5C8E5F" "CBO"@"SEL$2")
INDEX_FFS(@"SEL$BE5C8E5F" "T2"@"SEL$3" ("T2"."OBJECT_ID"))
LEADING(@"SEL$BE5C8E5F" "CBO"@"SEL$2" "T2"@"SEL$3")
USE_HASH(@"SEL$BE5C8E5F" "T2"@"SEL$3")
END_OUTLINE_DATA
*/

Predicate Information (identified by operation id):
---------------------------------------------------

5 - access("CBO"."OBJECT_ID"="T2"."OBJECT_ID")
8 - access("CBO"."OBJECT_NAME"="T2"."OBJECT_NAME")

No comments: