From 9798dd4b1246fb50a3b4ec1bc0a9e57860020267 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 23 Jan 2019 13:25:04 +0800
Subject: [PATCH 1/2] Disable dictionary filtering by default at Parquet

---
 .../datasources/parquet/ParquetFileFormat.scala   | 11 +++++++++++
 .../datasources/parquet/ParquetQuerySuite.scala   | 15 +++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index f04502d113acb..b58545b4321f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -314,6 +314,17 @@ class ParquetFileFormat
       SQLConf.CASE_SENSITIVE.key,
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
+    // There are three things to note here.
+    //
+    // 1. Dictionary filtering has an issue about the predication on null. For this reason,
+    //   This filtering is disabled. See SPARK-26677.
+    //
+    // 2. We should disable 'parquet.filter.dictionary.enabled' but
+    //   the 'parquet.filter.stats.enabled' and 'parquet.filter.dictionary.enabled' were
+    //   swapped mistakenly in Parquet side. It should use 'parquet.filter.dictionary.enabled'
+    //   when Spark upgrades Parquet. See PARQUET-1309.
+    hadoopConf.setIfUnset(ParquetInputFormat.STATS_FILTERING_ENABLED, "false")
+
     ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)
 
     // Sets flags for `ParquetToSparkSchemaConverter`
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index ce1dc6e159c61..beb89d91c9266 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -890,6 +890,21 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
       }
     }
   }
+
+  test("SPARK-26677: negated null-safe equality comparison should not filter matched row groups") {
+    (true :: false :: Nil).foreach { vectorized =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
+        withTempPath { path =>
+          // Repeated values for dictionary encoding.
+          Seq(Some("A"), Some("A"), None).toDF.repartition(1)
+            .write.parquet(path.getAbsolutePath)
+          val df = spark.read.parquet(path.getAbsolutePath)
+          checkAnswer(stripSparkFilter(df.where("NOT (value <=> 'A')")), df)
+        }
+      }
+    }
+  }
+
 }
 
 object TestingUDT {

From eb2eb2bd93394517716df9f47ab650078544e184 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 23 Jan 2019 14:03:35 +0800
Subject: [PATCH 2/2] typo at comments

---
 .../sql/execution/datasources/parquet/ParquetFileFormat.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index b58545b4321f3..5580dec3ff269 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -314,7 +314,7 @@ class ParquetFileFormat
       SQLConf.CASE_SENSITIVE.key,
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
-    // There are three things to note here.
+    // There are two things to note here.
     //
     // 1. Dictionary filtering has an issue about the predication on null. For this reason,
     //   This filtering is disabled. See SPARK-26677.