Skip to content

Commit 47dd3e1

Browse files
dilipbiswalcmonkey
authored andcommitted
[SPARK-18872][SQL][TESTS] New test cases for EXISTS subquery (Joins + CTE)
## What changes were proposed in this pull request? This PR adds the third and final set of tests for EXISTS subquery. File name | Brief description ------------------------| ----------------- exists-cte.sql |Tests Exist subqueries referencing CTE exists-joins-and-set-ops.sql|Tests Exists subquery used in Joins (Both when joins occurs in outer and suquery blocks) DB2 results are attached here as reference : [exists-cte-db2.txt](https://github.com/apache/spark/files/752091/exists-cte-db2.txt) [exists-joins-and-set-ops-db2.txt](https://github.com/apache/spark/files/753283/exists-joins-and-set-ops-db2.txt) (updated) ## How was this patch tested? The test result is compared with the result run from another SQL engine (in this case is IBM DB2). If the result are equivalent, we assume the result is correct. Author: Dilip Biswal <[email protected]> Closes apache#16802 from dilipbiswal/exists-pr3.
1 parent 7d43b4c commit 47dd3e1

File tree

4 files changed

+932
-0
lines changed

4 files changed

+932
-0
lines changed
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
-- Tests EXISTS subquery used along with
2+
-- Common Table Expressions(CTE)
3+
4+
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
5+
(100, "emp 1", date "2005-01-01", 100.00D, 10),
6+
(100, "emp 1", date "2005-01-01", 100.00D, 10),
7+
(200, "emp 2", date "2003-01-01", 200.00D, 10),
8+
(300, "emp 3", date "2002-01-01", 300.00D, 20),
9+
(400, "emp 4", date "2005-01-01", 400.00D, 30),
10+
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
11+
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
12+
(700, "emp 7", date "2010-01-01", 400.00D, 100),
13+
(800, "emp 8", date "2016-01-01", 150.00D, 70)
14+
AS EMP(id, emp_name, hiredate, salary, dept_id);
15+
16+
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
17+
(10, "dept 1", "CA"),
18+
(20, "dept 2", "NY"),
19+
(30, "dept 3", "TX"),
20+
(40, "dept 4 - unassigned", "OR"),
21+
(50, "dept 5 - unassigned", "NJ"),
22+
(70, "dept 7", "FL")
23+
AS DEPT(dept_id, dept_name, state);
24+
25+
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
26+
("emp 1", 10.00D),
27+
("emp 1", 20.00D),
28+
("emp 2", 300.00D),
29+
("emp 2", 100.00D),
30+
("emp 3", 300.00D),
31+
("emp 4", 100.00D),
32+
("emp 5", 1000.00D),
33+
("emp 6 - no dept", 500.00D)
34+
AS BONUS(emp_name, bonus_amt);
35+
36+
-- CTE used inside subquery with correlated condition
37+
-- TC.01.01
38+
WITH bonus_cte
39+
AS (SELECT *
40+
FROM bonus
41+
WHERE EXISTS (SELECT dept.dept_id,
42+
emp.emp_name,
43+
Max(salary),
44+
Count(*)
45+
FROM emp
46+
JOIN dept
47+
ON dept.dept_id = emp.dept_id
48+
WHERE bonus.emp_name = emp.emp_name
49+
GROUP BY dept.dept_id,
50+
emp.emp_name
51+
ORDER BY emp.emp_name))
52+
SELECT *
53+
FROM bonus a
54+
WHERE a.bonus_amt > 30
55+
AND EXISTS (SELECT 1
56+
FROM bonus_cte b
57+
WHERE a.emp_name = b.emp_name);
58+
59+
-- Inner join between two CTEs with correlated condition
60+
-- TC.01.02
61+
WITH emp_cte
62+
AS (SELECT *
63+
FROM emp
64+
WHERE id >= 100
65+
AND id <= 300),
66+
dept_cte
67+
AS (SELECT *
68+
FROM dept
69+
WHERE dept_id = 10)
70+
SELECT *
71+
FROM bonus
72+
WHERE EXISTS (SELECT *
73+
FROM emp_cte a
74+
JOIN dept_cte b
75+
ON a.dept_id = b.dept_id
76+
WHERE bonus.emp_name = a.emp_name);
77+
78+
-- Left outer join between two CTEs with correlated condition
79+
-- TC.01.03
80+
WITH emp_cte
81+
AS (SELECT *
82+
FROM emp
83+
WHERE id >= 100
84+
AND id <= 300),
85+
dept_cte
86+
AS (SELECT *
87+
FROM dept
88+
WHERE dept_id = 10)
89+
SELECT DISTINCT b.emp_name,
90+
b.bonus_amt
91+
FROM bonus b,
92+
emp_cte e,
93+
dept d
94+
WHERE e.dept_id = d.dept_id
95+
AND e.emp_name = b.emp_name
96+
AND EXISTS (SELECT *
97+
FROM emp_cte a
98+
LEFT JOIN dept_cte b
99+
ON a.dept_id = b.dept_id
100+
WHERE e.emp_name = a.emp_name);
101+
102+
-- Joins inside cte and aggregation on cte referenced subquery with correlated condition
103+
-- TC.01.04
104+
WITH empdept
105+
AS (SELECT id,
106+
salary,
107+
emp_name,
108+
dept.dept_id
109+
FROM emp
110+
LEFT JOIN dept
111+
ON emp.dept_id = dept.dept_id
112+
WHERE emp.id IN ( 100, 200 ))
113+
SELECT emp_name,
114+
Sum(bonus_amt)
115+
FROM bonus
116+
WHERE EXISTS (SELECT dept_id,
117+
max(salary)
118+
FROM empdept
119+
GROUP BY dept_id
120+
HAVING count(*) > 1)
121+
GROUP BY emp_name;
122+
123+
-- Using not exists
124+
-- TC.01.05
125+
WITH empdept
126+
AS (SELECT id,
127+
salary,
128+
emp_name,
129+
dept.dept_id
130+
FROM emp
131+
LEFT JOIN dept
132+
ON emp.dept_id = dept.dept_id
133+
WHERE emp.id IN ( 100, 200 ))
134+
SELECT emp_name,
135+
Sum(bonus_amt)
136+
FROM bonus
137+
WHERE NOT EXISTS (SELECT dept_id,
138+
Max(salary)
139+
FROM empdept
140+
GROUP BY dept_id
141+
HAVING count(*) < 1)
142+
GROUP BY emp_name;
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
-- Tests EXISTS subquery support. Tests Exists subquery
2+
-- used in Joins (Both when joins occurs in outer and suquery blocks)
3+
4+
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
5+
(100, "emp 1", date "2005-01-01", 100.00D, 10),
6+
(100, "emp 1", date "2005-01-01", 100.00D, 10),
7+
(200, "emp 2", date "2003-01-01", 200.00D, 10),
8+
(300, "emp 3", date "2002-01-01", 300.00D, 20),
9+
(400, "emp 4", date "2005-01-01", 400.00D, 30),
10+
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
11+
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
12+
(700, "emp 7", date "2010-01-01", 400.00D, 100),
13+
(800, "emp 8", date "2016-01-01", 150.00D, 70)
14+
AS EMP(id, emp_name, hiredate, salary, dept_id);
15+
16+
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
17+
(10, "dept 1", "CA"),
18+
(20, "dept 2", "NY"),
19+
(30, "dept 3", "TX"),
20+
(40, "dept 4 - unassigned", "OR"),
21+
(50, "dept 5 - unassigned", "NJ"),
22+
(70, "dept 7", "FL")
23+
AS DEPT(dept_id, dept_name, state);
24+
25+
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
26+
("emp 1", 10.00D),
27+
("emp 1", 20.00D),
28+
("emp 2", 300.00D),
29+
("emp 2", 100.00D),
30+
("emp 3", 300.00D),
31+
("emp 4", 100.00D),
32+
("emp 5", 1000.00D),
33+
("emp 6 - no dept", 500.00D)
34+
AS BONUS(emp_name, bonus_amt);
35+
36+
-- Join in outer query block
37+
-- TC.01.01
38+
SELECT *
39+
FROM emp,
40+
dept
41+
WHERE emp.dept_id = dept.dept_id
42+
AND EXISTS (SELECT *
43+
FROM bonus
44+
WHERE bonus.emp_name = emp.emp_name);
45+
46+
-- Join in outer query block with ON condition
47+
-- TC.01.02
48+
SELECT *
49+
FROM emp
50+
JOIN dept
51+
ON emp.dept_id = dept.dept_id
52+
WHERE EXISTS (SELECT *
53+
FROM bonus
54+
WHERE bonus.emp_name = emp.emp_name);
55+
56+
-- Left join in outer query block with ON condition
57+
-- TC.01.03
58+
SELECT *
59+
FROM emp
60+
LEFT JOIN dept
61+
ON emp.dept_id = dept.dept_id
62+
WHERE EXISTS (SELECT *
63+
FROM bonus
64+
WHERE bonus.emp_name = emp.emp_name);
65+
66+
-- Join in outer query block + NOT EXISTS
67+
-- TC.01.04
68+
SELECT *
69+
FROM emp,
70+
dept
71+
WHERE emp.dept_id = dept.dept_id
72+
AND NOT EXISTS (SELECT *
73+
FROM bonus
74+
WHERE bonus.emp_name = emp.emp_name);
75+
76+
77+
-- inner join in subquery.
78+
-- TC.01.05
79+
SELECT *
80+
FROM bonus
81+
WHERE EXISTS (SELECT *
82+
FROM emp
83+
JOIN dept
84+
ON dept.dept_id = emp.dept_id
85+
WHERE bonus.emp_name = emp.emp_name);
86+
87+
-- right join in subquery
88+
-- TC.01.06
89+
SELECT *
90+
FROM bonus
91+
WHERE EXISTS (SELECT *
92+
FROM emp
93+
RIGHT JOIN dept
94+
ON dept.dept_id = emp.dept_id
95+
WHERE bonus.emp_name = emp.emp_name);
96+
97+
98+
-- Aggregation and join in subquery
99+
-- TC.01.07
100+
SELECT *
101+
FROM bonus
102+
WHERE EXISTS (SELECT dept.dept_id,
103+
emp.emp_name,
104+
Max(salary),
105+
Count(*)
106+
FROM emp
107+
JOIN dept
108+
ON dept.dept_id = emp.dept_id
109+
WHERE bonus.emp_name = emp.emp_name
110+
GROUP BY dept.dept_id,
111+
emp.emp_name
112+
ORDER BY emp.emp_name);
113+
114+
-- Aggregations in outer and subquery + join in subquery
115+
-- TC.01.08
116+
SELECT emp_name,
117+
Sum(bonus_amt)
118+
FROM bonus
119+
WHERE EXISTS (SELECT emp_name,
120+
Max(salary)
121+
FROM emp
122+
JOIN dept
123+
ON dept.dept_id = emp.dept_id
124+
WHERE bonus.emp_name = emp.emp_name
125+
GROUP BY emp_name
126+
HAVING Count(*) > 1
127+
ORDER BY emp_name)
128+
GROUP BY emp_name;
129+
130+
-- TC.01.09
131+
SELECT emp_name,
132+
Sum(bonus_amt)
133+
FROM bonus
134+
WHERE NOT EXISTS (SELECT emp_name,
135+
Max(salary)
136+
FROM emp
137+
JOIN dept
138+
ON dept.dept_id = emp.dept_id
139+
WHERE bonus.emp_name = emp.emp_name
140+
GROUP BY emp_name
141+
HAVING Count(*) > 1
142+
ORDER BY emp_name)
143+
GROUP BY emp_name;
144+
145+
-- Set operations along with EXISTS subquery
146+
-- union
147+
-- TC.02.01
148+
SELECT *
149+
FROM emp
150+
WHERE EXISTS (SELECT *
151+
FROM dept
152+
WHERE dept_id < 30
153+
UNION
154+
SELECT *
155+
FROM dept
156+
WHERE dept_id >= 30
157+
AND dept_id <= 50);
158+
159+
-- intersect
160+
-- TC.02.02
161+
SELECT *
162+
FROM emp
163+
WHERE EXISTS (SELECT *
164+
FROM dept
165+
WHERE dept_id < 30
166+
INTERSECT
167+
SELECT *
168+
FROM dept
169+
WHERE dept_id >= 30
170+
AND dept_id <= 50);
171+
172+
-- intersect + not exists
173+
-- TC.02.03
174+
SELECT *
175+
FROM emp
176+
WHERE NOT EXISTS (SELECT *
177+
FROM dept
178+
WHERE dept_id < 30
179+
INTERSECT
180+
SELECT *
181+
FROM dept
182+
WHERE dept_id >= 30
183+
AND dept_id <= 50);
184+
185+
-- Union all in outer query and except,intersect in subqueries.
186+
-- TC.02.04
187+
SELECT *
188+
FROM emp
189+
WHERE EXISTS (SELECT *
190+
FROM dept
191+
EXCEPT
192+
SELECT *
193+
FROM dept
194+
WHERE dept_id > 50)
195+
UNION ALL
196+
SELECT *
197+
FROM emp
198+
WHERE EXISTS (SELECT *
199+
FROM dept
200+
WHERE dept_id < 30
201+
INTERSECT
202+
SELECT *
203+
FROM dept
204+
WHERE dept_id >= 30
205+
AND dept_id <= 50);
206+
207+
-- Union in outer query and except,intersect in subqueries.
208+
-- TC.02.05
209+
SELECT *
210+
FROM emp
211+
WHERE EXISTS (SELECT *
212+
FROM dept
213+
EXCEPT
214+
SELECT *
215+
FROM dept
216+
WHERE dept_id > 50)
217+
UNION
218+
SELECT *
219+
FROM emp
220+
WHERE EXISTS (SELECT *
221+
FROM dept
222+
WHERE dept_id < 30
223+
INTERSECT
224+
SELECT *
225+
FROM dept
226+
WHERE dept_id >= 30
227+
AND dept_id <= 50);

0 commit comments

Comments
 (0)