Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion python/pyspark/sql/connect/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1828,7 +1828,10 @@ def __dir__(self) -> List[str]:
def collect(self) -> List[Row]:
table, schema = self._to_table()

schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
# not all datatypes are supported in arrow based collect
# here always verify the schema by from_arrow_schema
schema2 = from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
schema = schema or schema2

assert schema is not None and isinstance(schema, StructType)

Expand Down
5 changes: 5 additions & 0 deletions python/pyspark/sql/tests/connect/test_connect_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ def test_select_none(self):
message_parameters={"arg_name": "columns"},
)

def test_ym_interval_in_collect(self):
# YearMonthIntervalType is not supported in python side arrow conversion
with self.assertRaises(PySparkTypeError):
self.connect.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").first()


if __name__ == "__main__":
from pyspark.sql.tests.connect.test_connect_error import * # noqa: F401
Expand Down