Skip to content
This repository was archived by the owner on Nov 15, 2024. It is now read-only.

Commit 9bf696d

Browse files
kiszkcloud-fan
authored andcommitted
[SPARK-21720][SQL] Fix 64KB JVM bytecode limit problem with AND or OR
## What changes were proposed in this pull request? This PR changes `AND` or `OR` code generation to place condition and then expressions' generated code into separated methods if these size could be large. When the method is newly generated, variables for `isNull` and `value` are declared as an instance variable to pass these values (e.g. `isNull1409` and `value1409`) to the callers of the generated method. This PR resolved two cases: * large code size of left expression * large code size of right expression ## How was this patch tested? Added a new test case into `CodeGenerationSuite` Author: Kazuaki Ishizaki <[email protected]> Closes apache#18972 from kiszk/SPARK-21720.
1 parent b3f9dbf commit 9bf696d

File tree

5 files changed

+157
-29
lines changed

5 files changed

+157
-29
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,36 @@ class CodegenContext {
908908
}
909909
}
910910

911+
/**
912+
* Wrap the generated code of expression, which was created from a row object in INPUT_ROW,
913+
* by a function. ev.isNull and ev.value are passed by global variables
914+
*
915+
* @param ev the code to evaluate expressions.
916+
* @param dataType the data type of ev.value.
917+
* @param baseFuncName the split function name base.
918+
*/
919+
def createAndAddFunction(
920+
ev: ExprCode,
921+
dataType: DataType,
922+
baseFuncName: String): (String, String, String) = {
923+
val globalIsNull = freshName("isNull")
924+
addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
925+
val globalValue = freshName("value")
926+
addMutableState(javaType(dataType), globalValue,
927+
s"$globalValue = ${defaultValue(dataType)};")
928+
val funcName = freshName(baseFuncName)
929+
val funcBody =
930+
s"""
931+
|private void $funcName(InternalRow ${INPUT_ROW}) {
932+
| ${ev.code.trim}
933+
| $globalIsNull = ${ev.isNull};
934+
| $globalValue = ${ev.value};
935+
|}
936+
""".stripMargin
937+
val fullFuncName = addNewFunction(funcName, funcBody)
938+
(fullFuncName, globalIsNull, globalValue)
939+
}
940+
911941
/**
912942
* Perform a function which generates a sequence of ExprCodes with a given mapping between
913943
* expressions and common expressions, instead of using the mapping in current context.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
7272
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
7373

7474
val (condFuncName, condGlobalIsNull, condGlobalValue) =
75-
createAndAddFunction(ctx, condEval, predicate.dataType, "evalIfCondExpr")
75+
ctx.createAndAddFunction(condEval, predicate.dataType, "evalIfCondExpr")
7676
val (trueFuncName, trueGlobalIsNull, trueGlobalValue) =
77-
createAndAddFunction(ctx, trueEval, trueValue.dataType, "evalIfTrueExpr")
77+
ctx.createAndAddFunction(trueEval, trueValue.dataType, "evalIfTrueExpr")
7878
val (falseFuncName, falseGlobalIsNull, falseGlobalValue) =
79-
createAndAddFunction(ctx, falseEval, falseValue.dataType, "evalIfFalseExpr")
79+
ctx.createAndAddFunction(falseEval, falseValue.dataType, "evalIfFalseExpr")
8080
s"""
8181
$condFuncName(${ctx.INPUT_ROW});
8282
boolean ${ev.isNull} = false;
@@ -112,29 +112,6 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
112112
ev.copy(code = generatedCode)
113113
}
114114

115-
private def createAndAddFunction(
116-
ctx: CodegenContext,
117-
ev: ExprCode,
118-
dataType: DataType,
119-
baseFuncName: String): (String, String, String) = {
120-
val globalIsNull = ctx.freshName("isNull")
121-
ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
122-
val globalValue = ctx.freshName("value")
123-
ctx.addMutableState(ctx.javaType(dataType), globalValue,
124-
s"$globalValue = ${ctx.defaultValue(dataType)};")
125-
val funcName = ctx.freshName(baseFuncName)
126-
val funcBody =
127-
s"""
128-
|private void $funcName(InternalRow ${ctx.INPUT_ROW}) {
129-
| ${ev.code.trim}
130-
| $globalIsNull = ${ev.isNull};
131-
| $globalValue = ${ev.value};
132-
|}
133-
""".stripMargin
134-
val fullFuncName = ctx.addNewFunction(funcName, funcBody)
135-
(fullFuncName, globalIsNull, globalValue)
136-
}
137-
138115
override def toString: String = s"if ($predicate) $trueValue else $falseValue"
139116

140117
override def sql: String = s"(IF(${predicate.sql}, ${trueValue.sql}, ${falseValue.sql}))"

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,46 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
368368
val eval2 = right.genCode(ctx)
369369

370370
// The result should be `false`, if any of them is `false` whenever the other is null or not.
371-
if (!left.nullable && !right.nullable) {
371+
372+
// place generated code of eval1 and eval2 in separate methods if their code combined is large
373+
val combinedLength = eval1.code.length + eval2.code.length
374+
if (combinedLength > 1024 &&
375+
// Split these expressions only if they are created from a row object
376+
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
377+
378+
val (eval1FuncName, eval1GlobalIsNull, eval1GlobalValue) =
379+
ctx.createAndAddFunction(eval1, BooleanType, "eval1Expr")
380+
val (eval2FuncName, eval2GlobalIsNull, eval2GlobalValue) =
381+
ctx.createAndAddFunction(eval2, BooleanType, "eval2Expr")
382+
if (!left.nullable && !right.nullable) {
383+
val generatedCode = s"""
384+
$eval1FuncName(${ctx.INPUT_ROW});
385+
boolean ${ev.value} = false;
386+
if (${eval1GlobalValue}) {
387+
$eval2FuncName(${ctx.INPUT_ROW});
388+
${ev.value} = ${eval2GlobalValue};
389+
}
390+
"""
391+
ev.copy(code = generatedCode, isNull = "false")
392+
} else {
393+
val generatedCode = s"""
394+
$eval1FuncName(${ctx.INPUT_ROW});
395+
boolean ${ev.isNull} = false;
396+
boolean ${ev.value} = false;
397+
if (!${eval1GlobalIsNull} && !${eval1GlobalValue}) {
398+
} else {
399+
$eval2FuncName(${ctx.INPUT_ROW});
400+
if (!${eval2GlobalIsNull} && !${eval2GlobalValue}) {
401+
} else if (!${eval1GlobalIsNull} && !${eval2GlobalIsNull}) {
402+
${ev.value} = true;
403+
} else {
404+
${ev.isNull} = true;
405+
}
406+
}
407+
"""
408+
ev.copy(code = generatedCode)
409+
}
410+
} else if (!left.nullable && !right.nullable) {
372411
ev.copy(code = s"""
373412
${eval1.code}
374413
boolean ${ev.value} = false;
@@ -431,7 +470,46 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
431470
val eval2 = right.genCode(ctx)
432471

433472
// The result should be `true`, if any of them is `true` whenever the other is null or not.
434-
if (!left.nullable && !right.nullable) {
473+
474+
// place generated code of eval1 and eval2 in separate methods if their code combined is large
475+
val combinedLength = eval1.code.length + eval2.code.length
476+
if (combinedLength > 1024 &&
477+
// Split these expressions only if they are created from a row object
478+
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
479+
480+
val (eval1FuncName, eval1GlobalIsNull, eval1GlobalValue) =
481+
ctx.createAndAddFunction(eval1, BooleanType, "eval1Expr")
482+
val (eval2FuncName, eval2GlobalIsNull, eval2GlobalValue) =
483+
ctx.createAndAddFunction(eval2, BooleanType, "eval2Expr")
484+
if (!left.nullable && !right.nullable) {
485+
val generatedCode = s"""
486+
$eval1FuncName(${ctx.INPUT_ROW});
487+
boolean ${ev.value} = true;
488+
if (!${eval1GlobalValue}) {
489+
$eval2FuncName(${ctx.INPUT_ROW});
490+
${ev.value} = ${eval2GlobalValue};
491+
}
492+
"""
493+
ev.copy(code = generatedCode, isNull = "false")
494+
} else {
495+
val generatedCode = s"""
496+
$eval1FuncName(${ctx.INPUT_ROW});
497+
boolean ${ev.isNull} = false;
498+
boolean ${ev.value} = true;
499+
if (!${eval1GlobalIsNull} && ${eval1GlobalValue}) {
500+
} else {
501+
$eval2FuncName(${ctx.INPUT_ROW});
502+
if (!${eval2GlobalIsNull} && ${eval2GlobalValue}) {
503+
} else if (!${eval1GlobalIsNull} && !${eval2GlobalIsNull}) {
504+
${ev.value} = false;
505+
} else {
506+
${ev.isNull} = true;
507+
}
508+
}
509+
"""
510+
ev.copy(code = generatedCode)
511+
}
512+
} else if (!left.nullable && !right.nullable) {
435513
ev.isNull = "false"
436514
ev.copy(code = s"""
437515
${eval1.code}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,4 +341,43 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
341341
// should not throw exception
342342
projection(row)
343343
}
344+
345+
test("SPARK-21720: split large predications into blocks due to JVM code size limit") {
346+
val length = 600
347+
348+
val input = new GenericInternalRow(length)
349+
val utf8Str = UTF8String.fromString(s"abc")
350+
for (i <- 0 until length) {
351+
input.update(i, utf8Str)
352+
}
353+
354+
var exprOr: Expression = Literal(false)
355+
for (i <- 0 until length) {
356+
exprOr = Or(EqualTo(BoundReference(i, StringType, true), Literal(s"c$i")), exprOr)
357+
}
358+
359+
val planOr = GenerateMutableProjection.generate(Seq(exprOr))
360+
val actualOr = planOr(input).toSeq(Seq(exprOr.dataType))
361+
assert(actualOr.length == 1)
362+
val expectedOr = false
363+
364+
if (!checkResult(actualOr.head, expectedOr, exprOr.dataType)) {
365+
fail(s"Incorrect Evaluation: expressions: $exprOr, actual: $actualOr, expected: $expectedOr")
366+
}
367+
368+
var exprAnd: Expression = Literal(true)
369+
for (i <- 0 until length) {
370+
exprAnd = And(EqualTo(BoundReference(i, StringType, true), Literal(s"c$i")), exprAnd)
371+
}
372+
373+
val planAnd = GenerateMutableProjection.generate(Seq(exprAnd))
374+
val actualAnd = planAnd(input).toSeq(Seq(exprAnd.dataType))
375+
assert(actualAnd.length == 1)
376+
val expectedAnd = false
377+
378+
if (!checkResult(actualAnd.head, expectedAnd, exprAnd.dataType)) {
379+
fail(
380+
s"Incorrect Evaluation: expressions: $exprAnd, actual: $actualAnd, expected: $expectedAnd")
381+
}
382+
}
344383
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2097,7 +2097,11 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
20972097
.count
20982098
}
20992099

2100-
testQuietly("SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit") {
2100+
// The fix of SPARK-21720 avoid an exception regarding JVM code size limit
2101+
// TODO: When we make a threshold of splitting statements (1024) configurable,
2102+
// we will re-enable this with max threshold to cause an exception
2103+
// See https://github.com/apache/spark/pull/18972/files#r150223463
2104+
ignore("SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit") {
21012105
val N = 400
21022106
val rows = Seq(Row.fromSeq(Seq.fill(N)("string")))
21032107
val schema = StructType(Seq.tabulate(N)(i => StructField(s"_c$i", StringType)))

0 commit comments

Comments
 (0)