Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/pyspark/sql/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,17 +376,17 @@ def registerJavaFunction(self, name, javaClassName, returnType=None):
>>> spark.udf.registerJavaFunction(
... "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType())
>>> spark.sql("SELECT javaStringLength('test')").collect()
[Row(UDF:javaStringLength(test)=4)]
[Row(javaStringLength(test)=4)]

>>> spark.udf.registerJavaFunction(
... "javaStringLength2", "test.org.apache.spark.sql.JavaStringLength")
>>> spark.sql("SELECT javaStringLength2('test')").collect()
[Row(UDF:javaStringLength2(test)=4)]
[Row(javaStringLength2(test)=4)]

>>> spark.udf.registerJavaFunction(
... "javaStringLength3", "test.org.apache.spark.sql.JavaStringLength", "integer")
>>> spark.sql("SELECT javaStringLength3('test')").collect()
[Row(UDF:javaStringLength3(test)=4)]
[Row(javaStringLength3(test)=4)]
"""

jdt = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ case class ScalaUDF(

override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)

override def toString: String =
s"${udfName.map(name => s"UDF:$name").getOrElse("UDF")}(${children.mkString(", ")})"
override def toString: String = s"${udfName.getOrElse("UDF")}(${children.mkString(", ")})"

// scalastyle:off line.size.limit

Expand Down
272 changes: 272 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
--
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
--
-- CASE
-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/case.sql
-- Test the CASE statement
--
-- This test suite contains two Cartesian products without using explicit CROSS JOIN syntax.
-- Thus, we set spark.sql.crossJoin.enabled to true.

-- This test file was converted from pgSQL/case.sql.
-- Note that currently registered UDF returns a string. So there are some differences, for instance
-- in string cast within UDF in Scala and Python.

set spark.sql.crossJoin.enabled=true;
CREATE TABLE CASE_TBL (
i integer,
f double
) USING parquet;

CREATE TABLE CASE2_TBL (
i integer,
j integer
) USING parquet;

INSERT INTO CASE_TBL VALUES (1, 10.1);
INSERT INTO CASE_TBL VALUES (2, 20.2);
INSERT INTO CASE_TBL VALUES (3, -30.3);
INSERT INTO CASE_TBL VALUES (4, NULL);

INSERT INTO CASE2_TBL VALUES (1, -1);
INSERT INTO CASE2_TBL VALUES (2, -2);
INSERT INTO CASE2_TBL VALUES (3, -3);
INSERT INTO CASE2_TBL VALUES (2, -4);
INSERT INTO CASE2_TBL VALUES (1, NULL);
INSERT INTO CASE2_TBL VALUES (NULL, -6);

--
-- Simplest examples without tables
--

SELECT '3' AS `One`,
CASE
WHEN CAST(udf(1 < 2) AS boolean) THEN 3
END AS `Simple WHEN`;

SELECT '<NULL>' AS `One`,
CASE
WHEN 1 > 2 THEN udf(3)
END AS `Simple default`;

SELECT '3' AS `One`,
CASE
WHEN udf(1) < 2 THEN udf(3)
ELSE udf(4)
END AS `Simple ELSE`;

SELECT udf('4') AS `One`,
CASE
WHEN 1 > 2 THEN 3
ELSE 4
END AS `ELSE default`;

SELECT udf('6') AS `One`,
CASE
WHEN CAST(udf(1 > 2) AS boolean) THEN 3
WHEN udf(4) < 5 THEN 6
ELSE 7
END AS `Two WHEN with default`;

SELECT '7' AS `None`,
CASE WHEN rand() < udf(0) THEN 1
END AS `NULL on no matches`;

-- Constant-expression folding shouldn't evaluate unreachable subexpressions
SELECT CASE WHEN CAST(udf(1=0) AS boolean) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END;
SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END;

-- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
-- However we do not currently suppress folding of potentially
-- reachable subexpressions
SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl;

-- Test for cases involving untyped literals in test expression
SELECT CASE 'a' WHEN 'a' THEN udf(1) ELSE udf(2) END;

--
-- Examples of targets involving tables
--

SELECT '' AS `Five`,
CASE
WHEN i >= 3 THEN i
END AS `>= 3 or Null`
FROM CASE_TBL;

SELECT '' AS `Five`,
CASE WHEN i >= 3 THEN (i + i)
ELSE i
END AS `Simplest Math`
FROM CASE_TBL;

SELECT '' AS `Five`, i AS `Value`,
CASE WHEN (i < 0) THEN 'small'
WHEN (i = 0) THEN 'zero'
WHEN (i = 1) THEN 'one'
WHEN (i = 2) THEN 'two'
ELSE 'big'
END AS `Category`
FROM CASE_TBL;

SELECT '' AS `Five`,
CASE WHEN ((i < 0) or (i < 0)) THEN 'small'
WHEN ((i = 0) or (i = 0)) THEN 'zero'
WHEN ((i = 1) or (i = 1)) THEN 'one'
WHEN ((i = 2) or (i = 2)) THEN 'two'
ELSE 'big'
END AS `Category`
FROM CASE_TBL;

--
-- Examples of qualifications involving tables
--

--
-- NULLIF() and COALESCE()
-- Shorthand forms for typical CASE constructs
-- defined in the SQL standard.
--

SELECT * FROM CASE_TBL WHERE udf(COALESCE(f,i)) = 4;

SELECT * FROM CASE_TBL WHERE udf(NULLIF(f,i)) = 2;

SELECT udf(COALESCE(a.f, b.i, b.j))
FROM CASE_TBL a, CASE2_TBL b;

SELECT *
FROM CASE_TBL a, CASE2_TBL b
WHERE udf(COALESCE(a.f, b.i, b.j)) = 2;

SELECT udf('') AS Five, NULLIF(a.i,b.i) AS `NULLIF(a.i,b.i)`,
NULLIF(b.i, 4) AS `NULLIF(b.i,4)`
FROM CASE_TBL a, CASE2_TBL b;

SELECT '' AS `Two`, *
FROM CASE_TBL a, CASE2_TBL b
WHERE CAST(udf(COALESCE(f,b.i) = 2) AS boolean);

-- We don't support update now.
--
-- Examples of updates involving tables
--

-- UPDATE CASE_TBL
-- SET i = CASE WHEN i >= 3 THEN (- i)
-- ELSE (2 * i) END;

-- SELECT * FROM CASE_TBL;

-- UPDATE CASE_TBL
-- SET i = CASE WHEN i >= 2 THEN (2 * i)
-- ELSE (3 * i) END;

-- SELECT * FROM CASE_TBL;

-- UPDATE CASE_TBL
-- SET i = CASE WHEN b.i >= 2 THEN (2 * j)
-- ELSE (3 * j) END
-- FROM CASE2_TBL b
-- WHERE j = -CASE_TBL.i;

-- SELECT * FROM CASE_TBL;

--
-- Nested CASE expressions
--

-- This test exercises a bug caused by aliasing econtext->caseValue_isNull
-- with the isNull argument of the inner CASE's CaseExpr evaluation. After
-- evaluating the vol(null) expression in the inner CASE's second WHEN-clause,
-- the isNull flag for the case test value incorrectly became true, causing
-- the third WHEN-clause not to match. The volatile function calls are needed
-- to prevent constant-folding in the planner, which would hide the bug.

-- Wrap this in a single transaction so the transient '=' operator doesn't
-- cause problems in concurrent sessions
-- BEGIN;

-- CREATE FUNCTION vol(text) returns text as
-- 'begin return $1; end' language plpgsql volatile;

SELECT CASE
(CASE vol('bar')
WHEN udf('foo') THEN 'it was foo!'
WHEN udf(vol(null)) THEN 'null input'
WHEN 'bar' THEN 'it was bar!' END
)
WHEN udf('it was foo!') THEN 'foo recognized'
WHEN 'it was bar!' THEN udf('bar recognized')
ELSE 'unrecognized' END AS col;

-- We don't support the features below:
-- 1. CREATE DOMAIN ...
-- 2. CREATE OPERATOR ...
-- 3. CREATE TYPE ...

-- In this case, we can't inline the SQL function without confusing things.
-- CREATE DOMAIN foodomain AS text;

-- CREATE FUNCTION volfoo(text) returns foodomain as
-- 'begin return $1::foodomain; end' language plpgsql volatile;

-- CREATE FUNCTION inline_eq(foodomain, foodomain) returns boolean as
-- 'SELECT CASE $2::text WHEN $1::text THEN true ELSE false END' language sql;

-- CREATE OPERATOR = (procedure = inline_eq,
-- leftarg = foodomain, rightarg = foodomain);

-- SELECT CASE volfoo('bar') WHEN 'foo'::foodomain THEN 'is foo' ELSE 'is not foo' END;

-- ROLLBACK;

-- Test multiple evaluation of a CASE arg that is a read/write object (#14472)
-- Wrap this in a single transaction so the transient '=' operator doesn't
-- cause problems in concurrent sessions
-- BEGIN;

-- CREATE DOMAIN arrdomain AS int[];

-- CREATE FUNCTION make_ad(int,int) returns arrdomain as
-- 'declare x arrdomain;
-- begin
-- x := array[$1,$2];
-- return x;
-- end' language plpgsql volatile;

-- CREATE FUNCTION ad_eq(arrdomain, arrdomain) returns boolean as
-- 'begin return array_eq($1, $2); end' language plpgsql;

-- CREATE OPERATOR = (procedure = ad_eq,
-- leftarg = arrdomain, rightarg = arrdomain);

-- SELECT CASE make_ad(1,2)
-- WHEN array[2,4]::arrdomain THEN 'wrong'
-- WHEN array[2,5]::arrdomain THEN 'still wrong'
-- WHEN array[1,2]::arrdomain THEN 'right'
-- END;

-- ROLLBACK;

-- Test interaction of CASE with ArrayCoerceExpr (bug #15471)
-- BEGIN;

-- CREATE TYPE casetestenum AS ENUM ('e', 'f', 'g');

-- SELECT
-- CASE 'foo'::text
-- WHEN 'foo' THEN ARRAY['a', 'b', 'c', 'd'] || enum_range(NULL::casetestenum)::text[]
-- ELSE ARRAY['x', 'y']
-- END;

-- ROLLBACK;

--
-- Clean up
--

DROP TABLE CASE_TBL;
DROP TABLE CASE2_TBL;
set spark.sql.crossJoin.enabled=false;
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ SELECT CASE
WHEN 'it was bar!' THEN 'bar recognized'
ELSE 'unrecognized' END
-- !query 33 schema
struct<CASE WHEN (CASE WHEN (UDF:vol(bar) = foo) THEN it was foo! WHEN (UDF:vol(bar) = UDF:vol(null)) THEN null input WHEN (UDF:vol(bar) = bar) THEN it was bar! END = it was foo!) THEN foo recognized WHEN (CASE WHEN (UDF:vol(bar) = foo) THEN it was foo! WHEN (UDF:vol(bar) = UDF:vol(null)) THEN null input WHEN (UDF:vol(bar) = bar) THEN it was bar! END = it was bar!) THEN bar recognized ELSE unrecognized END:string>
struct<CASE WHEN (CASE WHEN (vol(bar) = foo) THEN it was foo! WHEN (vol(bar) = vol(null)) THEN null input WHEN (vol(bar) = bar) THEN it was bar! END = it was foo!) THEN foo recognized WHEN (CASE WHEN (vol(bar) = foo) THEN it was foo! WHEN (vol(bar) = vol(null)) THEN null input WHEN (vol(bar) = bar) THEN it was bar! END = it was bar!) THEN bar recognized ELSE unrecognized END:string>
-- !query 33 output
bar recognized

Expand Down
Loading