Disable Arrow safe type check for some tests.

viirya · viirya · commit c31d519aec7c · 2019-01-08T22:06:46.000+08:00
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -138,36 +138,44 @@ def test_vectorized_udf_null_boolean(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_byte(self):
-        data = [(None,), (2,), (3,), (4,)]
-        schema = StructType().add("byte", ByteType())
-        df = self.spark.createDataFrame(data, schema)
-        byte_f = pandas_udf(lambda x: x, ByteType())
-        res = df.select(byte_f(col('byte')))
-        self.assertEquals(df.collect(), res.collect())
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            data = [(None,), (2,), (3,), (4,)]
+            schema = StructType().add("byte", ByteType())
+            df = self.spark.createDataFrame(data, schema)
+            byte_f = pandas_udf(lambda x: x, ByteType())
+            res = df.select(byte_f(col('byte')))
+            self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_short(self):
-        data = [(None,), (2,), (3,), (4,)]
-        schema = StructType().add("short", ShortType())
-        df = self.spark.createDataFrame(data, schema)
-        short_f = pandas_udf(lambda x: x, ShortType())
-        res = df.select(short_f(col('short')))
-        self.assertEquals(df.collect(), res.collect())
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            data = [(None,), (2,), (3,), (4,)]
+            schema = StructType().add("short", ShortType())
+            df = self.spark.createDataFrame(data, schema)
+            short_f = pandas_udf(lambda x: x, ShortType())
+            res = df.select(short_f(col('short')))
+            self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_int(self):
-        data = [(None,), (2,), (3,), (4,)]
-        schema = StructType().add("int", IntegerType())
-        df = self.spark.createDataFrame(data, schema)
-        int_f = pandas_udf(lambda x: x, IntegerType())
-        res = df.select(int_f(col('int')))
-        self.assertEquals(df.collect(), res.collect())
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            data = [(None,), (2,), (3,), (4,)]
+            schema = StructType().add("int", IntegerType())
+            df = self.spark.createDataFrame(data, schema)
+            int_f = pandas_udf(lambda x: x, IntegerType())
+            res = df.select(int_f(col('int')))
+            self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_long(self):
-        data = [(None,), (2,), (3,), (4,)]
-        schema = StructType().add("long", LongType())
-        df = self.spark.createDataFrame(data, schema)
-        long_f = pandas_udf(lambda x: x, LongType())
-        res = df.select(long_f(col('long')))
-        self.assertEquals(df.collect(), res.collect())
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            data = [(None,), (2,), (3,), (4,)]
+            schema = StructType().add("long", LongType())
+            df = self.spark.createDataFrame(data, schema)
+            long_f = pandas_udf(lambda x: x, LongType())
+            res = df.select(long_f(col('long')))
+            self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_float(self):
         data = [(3.0,), (5.0,), (-1.0,), (None,)]