-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-27619][SQL]MapType should be prohibited in hash expressions #27580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
211982b
451a622
1eb70f2
06b28e5
068d76f
ab27a0f
5c5ae20
8dfee67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ | |
| import org.apache.spark.sql.catalyst.expressions.codegen.Block._ | ||
| import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} | ||
| import org.apache.spark.sql.catalyst.util.DateTimeConstants._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.Platform | ||
| import org.apache.spark.unsafe.hash.Murmur3_x86_32 | ||
|
|
@@ -232,9 +233,6 @@ case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInp | |
| * - array: The `result` starts with seed, then use `result` as seed, recursively | ||
| * calculate hash value for each element, and assign the element hash value | ||
| * to `result`. | ||
| * - map: The `result` starts with seed, then use `result` as seed, recursively | ||
| * calculate hash value for each key-value, and assign the key-value hash | ||
| * value to `result`. | ||
| * - struct: The `result` starts with seed, then use `result` as seed, recursively | ||
| * calculate hash value for each field, and assign the field hash value to | ||
| * `result`. | ||
|
|
@@ -249,10 +247,21 @@ abstract class HashExpression[E] extends Expression { | |
|
|
||
| override def nullable: Boolean = false | ||
|
|
||
| private def hasMapType(dt: DataType): Boolean = { | ||
| dt.existsRecursively(_.isInstanceOf[MapType]) | ||
| } | ||
|
|
||
| override def checkInputDataTypes(): TypeCheckResult = { | ||
| if (children.length < 1) { | ||
| TypeCheckResult.TypeCheckFailure( | ||
| s"input to function $prettyName requires at least one argument") | ||
| } else if (children.exists(child => hasMapType(child.dataType)) && | ||
| !SQLConf.get.getConf(SQLConf.LEGACY_USE_HASH_ON_MAPTYPE)) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indentation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun should i raise a followup for these changes? |
||
| TypeCheckResult.TypeCheckFailure( | ||
| s"input to function $prettyName cannot contain elements of MapType. In Spark, same maps " + | ||
| "may have different hashcode, thus hash expressions are prohibited on MapType " + | ||
| s"elements. To restore previous behavior set ${SQLConf.LEGACY_USE_HASH_ON_MAPTYPE.key} " + | ||
| "to true.") | ||
| } else { | ||
| TypeCheckResult.TypeCheckSuccess | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2205,6 +2205,12 @@ object SQLConf { | |
| .booleanConf | ||
| .createWithDefault(false) | ||
|
|
||
| val LEGACY_USE_HASH_ON_MAPTYPE = buildConf("spark.sql.legacy.useHashOnMapType") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah "allow" is more precise. @iRakson feel free to send a followup to address all the comments. |
||
| .doc("When set to true, hash expressions can be applied on elements of MapType. Otherwise, " + | ||
| "an analysis exception will be thrown.") | ||
| .booleanConf | ||
| .createWithDefault(false) | ||
|
|
||
| /** | ||
| * Holds information about keys that have been deprecated. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit.