From a3611f2e3ae6dd6ef1a66d6eb36ff373b771a410 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 13 Jun 2019 16:50:23 -0700
Subject: [PATCH 1/3] increase minimum pandas to 0.23.2

---
 python/pyspark/sql/utils.py | 2 +-
 python/setup.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 1c96e330cebc8..ca5e85bb3a9bb 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -131,7 +131,7 @@ def require_minimum_pandas_version():
     """ Raise ImportError if minimum version of Pandas is not installed
     """
     # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pandas_version = "0.19.2"
+    minimum_pandas_version = "0.23.2"
 
     from distutils.version import LooseVersion
     try:
diff --git a/python/setup.py b/python/setup.py
index e769bf52e7ebb..ee5c32683efae 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -105,7 +105,7 @@ def _supports_symlinks():
 # If you are changing the versions here, please also change ./python/pyspark/sql/utils.py
 # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
 # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
-_minimum_pandas_version = "0.19.2"
+_minimum_pandas_version = "0.23.2"
 _minimum_pyarrow_version = "0.12.1"
 
 try:

From cfaa0a08daf22337f1ccffdaed90a71949d8977e Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 13 Jun 2019 17:19:18 -0700
Subject: [PATCH 2/3] remove workaround to cast timestamps for pandas 0.19.2

---
 python/pyspark/serializers.py          | 2 --
 python/pyspark/sql/tests/test_arrow.py | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 516ee7e7b3084..fc0828b06a234 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -297,8 +297,6 @@ def create_array(s, t):
             # Ensure timestamp series are in expected form for Spark internal representation
             if t is not None and pa.types.is_timestamp(t):
                 s = _check_series_convert_timestamps_internal(s.fillna(0), self._timezone)
-                # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
-                return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
 
             try:
                 array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index cb5124181bef9..067113722adb5 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -268,10 +268,10 @@ def test_createDataFrame_with_schema(self):
     def test_createDataFrame_with_incorrect_schema(self):
         pdf = self.create_pandas_data_frame()
         fields = list(self.schema)
-        fields[0], fields[7] = fields[7], fields[0]  # swap str with timestamp
+        fields[0], fields[1] = fields[1], fields[0]  # swap str with int
         wrong_schema = StructType(fields)
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, ".*cast.*[s|S]tring.*timestamp.*"):
+            with self.assertRaisesRegexp(Exception, "integer.*required.*got.*str"):
                 self.spark.createDataFrame(pdf, schema=wrong_schema)
 
     def test_createDataFrame_with_names(self):

From 13b6ed448d7323f509792184b78df4bdec04d917 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Fri, 14 Jun 2019 14:25:41 -0700
Subject: [PATCH 3/3] Added note in migration guidea

---
 docs/sql-migration-guide-upgrade.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 44772cc595351..0a25e5b892e01 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -23,6 +23,10 @@ license: |
 {:toc}
 
 ## Upgrading From Spark SQL 2.4 to 3.0
+  - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
+
+  - Since Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as `pandas_udf`, `toPandas` and `createDataFrame` with "spark.sql.execution.arrow.enabled=true", etc.
+
   - In Spark version 2.4 and earlier, SQL queries such as `FROM <table>` or `FROM <table> UNION ALL FROM <table>` are supported by accident. In hive-style `FROM <table> SELECT <expr>`, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0.
 
   - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`.