From 7c569014c216908852cf1edfc84163893acafc4a Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 18 Jun 2024 11:33:12 +0800 Subject: [PATCH] init --- python/pyspark/sql/connect/dataframe.py | 5 ++++- python/pyspark/sql/tests/connect/test_connect_error.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index f2705ec7ad71b..678e66ee2b7b0 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1828,7 +1828,10 @@ def __dir__(self) -> List[str]: def collect(self) -> List[Row]: table, schema = self._to_table() - schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True) + # not all datatypes are supported in arrow based collect + # here always verify the schema by from_arrow_schema + schema2 = from_arrow_schema(table.schema, prefer_timestamp_ntz=True) + schema = schema or schema2 assert schema is not None and isinstance(schema, StructType) diff --git a/python/pyspark/sql/tests/connect/test_connect_error.py b/python/pyspark/sql/tests/connect/test_connect_error.py index 4677f3b84d754..d5d9f9a221847 100644 --- a/python/pyspark/sql/tests/connect/test_connect_error.py +++ b/python/pyspark/sql/tests/connect/test_connect_error.py @@ -225,6 +225,11 @@ def test_select_none(self): message_parameters={"arg_name": "columns"}, ) + def test_ym_interval_in_collect(self): + # YearMonthIntervalType is not supported in python side arrow conversion + with self.assertRaises(PySparkTypeError): + self.connect.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").first() + if __name__ == "__main__": from pyspark.sql.tests.connect.test_connect_error import * # noqa: F401