-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-22600][SQL] Fix 64kb limit for deeply nested expressions under wholestage codegen #19813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
34abc22
e0d111e
65d07d5
9f848be
57b1add
d051f9e
6368702
8c7f749
7f00515
777eb7a
7230997
fd87e9b
57a9fb7
0d358d6
aa3db2e
429afba
48add65
9443011
2f4014f
655917c
c083a79
1251dfa
c4f15f7
f35974e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,8 +55,24 @@ import org.apache.spark.util.{ParentClassLoader, Utils} | |
| * to null. | ||
| * @param value A term for a (possibly primitive) value of the result of the evaluation. Not | ||
| * valid if `isNull` is set to `true`. | ||
| * @param inputRow A term that holds the input row name when generating this code. | ||
| * @param inputVars A list of [[ExprInputVar]] that holds input variables when generating this code. | ||
| */ | ||
| case class ExprCode(var code: String, var isNull: String, var value: String) | ||
| case class ExprCode( | ||
| var code: String, | ||
| var isNull: String, | ||
| var value: String, | ||
| var inputRow: String = null, | ||
| var inputVars: Seq[ExprInputVar] = Seq.empty) | ||
|
|
||
| /** | ||
| * Represents an input variable [[ExprCode]] to an evaluation of an [[Expression]]. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add parameter doc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
| * | ||
| * @param exprCode The [[ExprCode]] that represents the evaluation result for the input variable. | ||
| * @param dataType The data type of the input variable. | ||
| * @param nullable Whether the input variable can be null or not. | ||
| */ | ||
| case class ExprInputVar(exprCode: ExprCode, dataType: DataType, nullable: Boolean) | ||
|
|
||
| /** | ||
| * State used for subexpression elimination. | ||
|
|
@@ -1001,16 +1017,25 @@ class CodegenContext { | |
| commonExprs.foreach { e => | ||
| val expr = e.head | ||
| val fnName = freshName("evalExpr") | ||
| val isNull = s"${fnName}IsNull" | ||
| val isNull = if (expr.nullable) { | ||
| s"${fnName}IsNull" | ||
| } else { | ||
| "" | ||
| } | ||
| val value = s"${fnName}Value" | ||
|
|
||
| // Generate the code for this expression tree and wrap it in a function. | ||
| val eval = expr.genCode(this) | ||
| val assignIsNull = if (expr.nullable) { | ||
| s"$isNull = ${eval.isNull};" | ||
| } else { | ||
| "" | ||
| } | ||
| val fn = | ||
| s""" | ||
| |private void $fnName(InternalRow $INPUT_ROW) { | ||
| | ${eval.code.trim} | ||
| | $isNull = ${eval.isNull}; | ||
| | $assignIsNull | ||
| | $value = ${eval.value}; | ||
| |} | ||
| """.stripMargin | ||
|
|
@@ -1028,12 +1053,17 @@ class CodegenContext { | |
| // 2. Less code. | ||
| // Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with | ||
| // at least two nodes) as the cost of doing it is expected to be low. | ||
| addMutableState(JAVA_BOOLEAN, isNull, s"$isNull = false;") | ||
| addMutableState(javaType(expr.dataType), value, | ||
| s"$value = ${defaultValue(expr.dataType)};") | ||
| if (expr.nullable) { | ||
| addMutableState(JAVA_BOOLEAN, isNull) | ||
| } | ||
| addMutableState(javaType(expr.dataType), value) | ||
|
|
||
| subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);" | ||
| val state = SubExprEliminationState(isNull, value) | ||
| val state = if (expr.nullable) { | ||
| SubExprEliminationState(isNull, value) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we still need it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is still needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because here it is not |
||
| } else { | ||
| SubExprEliminationState("false", value) | ||
| } | ||
| e.foreach(subExprEliminationExprs.put(_, state)) | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any findings for the more aggressive approach?
val isNull = if (this.nullable) ...Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compilation error like:
Seems some expressions uses
eval.isNullas lvalue no matter it is nullable or not. Do you think we should find them out and change them?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
at least we should do it in a new PR. do you wanna do it after this one get merge?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea, I plan to do that after this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW after some glance, this may not worth. It's annoying to check
nullableand deal with how to assignisNullinExpressin.doGenCodeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, shall we keep it as it is or restore it back?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh I mean the more aggressive approach may not worth, we can even simplify some
Expression.doGenCodethat try to setisNulltofalse.