Skip to content

Commit 529a1d3

Browse files
JoshRosenmarmbrus
authored andcommitted
[SPARK-6152] Use shaded ASM5 to support closure cleaning of Java 8 compiled classes
This patch modifies Spark's closure cleaner (and a few other places) to use ASM 5, which is necessary in order to support cleaning of closures that were compiled by Java 8. In order to avoid ASM dependency conflicts, Spark excludes ASM from all of its dependencies and uses a shaded version of ASM 4 that comes from `reflectasm` (see [SPARK-782](https://issues.apache.org/jira/browse/SPARK-782) and #232). This patch updates Spark to use a shaded version of ASM 5.0.4 that was published by the Apache XBean project; the POM used to create the shaded artifact can be found at https://github.com/apache/geronimo-xbean/blob/xbean-4.4/xbean-asm5-shaded/pom.xml. http://movingfulcrum.tumblr.com/post/80826553604/asm-framework-50-the-missing-migration-guide was a useful resource while upgrading the code to use the new ASM5 opcodes. I also added a new regression tests in the `java8-tests` subproject; the existing tests were insufficient to catch this bug, which only affected Scala 2.11 user code which was compiled targeting Java 8. Author: Josh Rosen <[email protected]> Closes #9512 from JoshRosen/SPARK-6152.
1 parent e71ba56 commit 529a1d3

File tree

12 files changed

+125
-57
lines changed

12 files changed

+125
-57
lines changed

core/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
<groupId>com.twitter</groupId>
5252
<artifactId>chill-java</artifactId>
5353
</dependency>
54+
<dependency>
55+
<groupId>org.apache.xbean</groupId>
56+
<artifactId>xbean-asm5-shaded</artifactId>
57+
</dependency>
5458
<dependency>
5559
<groupId>org.apache.hadoop</groupId>
5660
<artifactId>hadoop-client</artifactId>

core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
2121

2222
import scala.collection.mutable.{Map, Set}
2323

24-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor, Type}
25-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._
24+
import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor, Type}
25+
import org.apache.xbean.asm5.Opcodes._
2626

2727
import org.apache.spark.{Logging, SparkEnv, SparkException}
2828

@@ -325,19 +325,19 @@ private[spark] object ClosureCleaner extends Logging {
325325
private[spark] class ReturnStatementInClosureException
326326
extends SparkException("Return statements aren't allowed in Spark closures")
327327

328-
private class ReturnStatementFinder extends ClassVisitor(ASM4) {
328+
private class ReturnStatementFinder extends ClassVisitor(ASM5) {
329329
override def visitMethod(access: Int, name: String, desc: String,
330330
sig: String, exceptions: Array[String]): MethodVisitor = {
331331
if (name.contains("apply")) {
332-
new MethodVisitor(ASM4) {
332+
new MethodVisitor(ASM5) {
333333
override def visitTypeInsn(op: Int, tp: String) {
334334
if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl")) {
335335
throw new ReturnStatementInClosureException
336336
}
337337
}
338338
}
339339
} else {
340-
new MethodVisitor(ASM4) {}
340+
new MethodVisitor(ASM5) {}
341341
}
342342
}
343343
}
@@ -361,7 +361,7 @@ private[util] class FieldAccessFinder(
361361
findTransitively: Boolean,
362362
specificMethod: Option[MethodIdentifier[_]] = None,
363363
visitedMethods: Set[MethodIdentifier[_]] = Set.empty)
364-
extends ClassVisitor(ASM4) {
364+
extends ClassVisitor(ASM5) {
365365

366366
override def visitMethod(
367367
access: Int,
@@ -376,7 +376,7 @@ private[util] class FieldAccessFinder(
376376
return null
377377
}
378378

379-
new MethodVisitor(ASM4) {
379+
new MethodVisitor(ASM5) {
380380
override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) {
381381
if (op == GETFIELD) {
382382
for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
@@ -385,7 +385,8 @@ private[util] class FieldAccessFinder(
385385
}
386386
}
387387

388-
override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
388+
override def visitMethodInsn(
389+
op: Int, owner: String, name: String, desc: String, itf: Boolean) {
389390
for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
390391
// Check for calls a getter method for a variable in an interpreter wrapper object.
391392
// This means that the corresponding field will be accessed, so we should save it.
@@ -408,7 +409,7 @@ private[util] class FieldAccessFinder(
408409
}
409410
}
410411

411-
private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM4) {
412+
private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM5) {
412413
var myName: String = null
413414

414415
// TODO: Recursively find inner closures that we indirectly reference, e.g.
@@ -423,9 +424,9 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
423424

424425
override def visitMethod(access: Int, name: String, desc: String,
425426
sig: String, exceptions: Array[String]): MethodVisitor = {
426-
new MethodVisitor(ASM4) {
427-
override def visitMethodInsn(op: Int, owner: String, name: String,
428-
desc: String) {
427+
new MethodVisitor(ASM5) {
428+
override def visitMethodInsn(
429+
op: Int, owner: String, name: String, desc: String, itf: Boolean) {
429430
val argTypes = Type.getArgumentTypes(desc)
430431
if (op == INVOKESPECIAL && name == "<init>" && argTypes.length > 0
431432
&& argTypes(0).toString.startsWith("L") // is it an object?

docs/building-spark.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ Running only Java 8 tests and nothing else.
190190

191191
mvn install -DskipTests -Pjava8-tests
192192

193+
or
194+
195+
sbt -Pjava8-tests java8-tests/test
196+
193197
Java 8 tests are run when `-Pjava8-tests` profile is enabled, they will run in spite of `-DskipTests`.
194198
For these tests to run your system must have a JDK 8 installation.
195199
If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark
19+
20+
/**
21+
* Test cases where JDK8-compiled Scala user code is used with Spark.
22+
*/
23+
class JDK8ScalaSuite extends SparkFunSuite with SharedSparkContext {
24+
test("basic RDD closure test (SPARK-6152)") {
25+
sc.parallelize(1 to 1000).map(x => x * x).count()
26+
}
27+
}

graphx/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
<type>test-jar</type>
4848
<scope>test</scope>
4949
</dependency>
50+
<dependency>
51+
<groupId>org.apache.xbean</groupId>
52+
<artifactId>xbean-asm5-shaded</artifactId>
53+
</dependency>
5054
<dependency>
5155
<groupId>com.google.guava</groupId>
5256
<artifactId>guava</artifactId>

graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
2222
import scala.collection.mutable.HashSet
2323
import scala.language.existentials
2424

25-
import org.apache.spark.util.Utils
26-
27-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
28-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._
25+
import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
26+
import org.apache.xbean.asm5.Opcodes._
2927

28+
import org.apache.spark.util.Utils
3029

3130
/**
3231
* Includes an utility function to test whether a function accesses a specific attribute
@@ -107,18 +106,19 @@ private[graphx] object BytecodeUtils {
107106
* MethodInvocationFinder("spark/graph/Foo", "test")
108107
* its methodsInvoked variable will contain the set of methods invoked directly by
109108
* Foo.test(). Interface invocations are not returned as part of the result set because we cannot
110-
* determine the actual metod invoked by inspecting the bytecode.
109+
* determine the actual method invoked by inspecting the bytecode.
111110
*/
112111
private class MethodInvocationFinder(className: String, methodName: String)
113-
extends ClassVisitor(ASM4) {
112+
extends ClassVisitor(ASM5) {
114113

115114
val methodsInvoked = new HashSet[(Class[_], String)]
116115

117116
override def visitMethod(access: Int, name: String, desc: String,
118117
sig: String, exceptions: Array[String]): MethodVisitor = {
119118
if (name == methodName) {
120-
new MethodVisitor(ASM4) {
121-
override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
119+
new MethodVisitor(ASM5) {
120+
override def visitMethodInsn(
121+
op: Int, owner: String, name: String, desc: String, itf: Boolean) {
122122
if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
123123
if (!skipClass(owner)) {
124124
methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))

pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,14 @@
393393
</exclusion>
394394
</exclusions>
395395
</dependency>
396+
<!-- This artifact is a shaded version of ASM 5.0.4. The POM that was used to produce this
397+
is at https://github.com/apache/geronimo-xbean/tree/xbean-4.4/xbean-asm5-shaded
398+
For context on why we shade ASM, see SPARK-782 and SPARK-6152. -->
399+
<dependency>
400+
<groupId>org.apache.xbean</groupId>
401+
<artifactId>xbean-asm5-shaded</artifactId>
402+
<version>4.4</version>
403+
</dependency>
396404

397405
<!-- Shaded deps marked as provided. These are promoted to compile scope
398406
in the modules where we want the shaded classes to appear in the

project/SparkBuild.scala

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ object BuildCommons {
5757
val sparkHome = buildLocation
5858

5959
val testTempDir = s"$sparkHome/target/tmp"
60+
61+
val javacJVMVersion = settingKey[String]("source and target JVM version for javac")
62+
val scalacJVMVersion = settingKey[String]("source and target JVM version for scalac")
6063
}
6164

6265
object SparkBuild extends PomBuild {
@@ -154,9 +157,17 @@ object SparkBuild extends PomBuild {
154157
if (major.toInt >= 1 && minor.toInt >= 8) Seq("-Xdoclint:all", "-Xdoclint:-missing") else Seq.empty
155158
},
156159

157-
javacOptions in Compile ++= Seq("-encoding", "UTF-8"),
160+
javacJVMVersion := "1.7",
161+
scalacJVMVersion := "1.7",
162+
163+
javacOptions in Compile ++= Seq(
164+
"-encoding", "UTF-8",
165+
"-source", javacJVMVersion.value,
166+
"-target", javacJVMVersion.value
167+
),
158168

159169
scalacOptions in Compile ++= Seq(
170+
s"-target:jvm-${scalacJVMVersion.value}",
160171
"-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath // Required for relative source links in scaladoc
161172
),
162173

@@ -241,8 +252,9 @@ object SparkBuild extends PomBuild {
241252

242253
enable(Flume.settings)(streamingFlumeSink)
243254

244-
enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
255+
enable(Java8TestSettings.settings)(java8Tests)
245256

257+
enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
246258

247259
/**
248260
* Adds the ability to run the spark shell directly from SBT without building an assembly
@@ -591,6 +603,16 @@ object Unidoc {
591603
)
592604
}
593605

606+
object Java8TestSettings {
607+
import BuildCommons._
608+
609+
lazy val settings = Seq(
610+
javacJVMVersion := "1.8",
611+
// Targeting Java 8 bytecode is only supported in Scala 2.11.4 and higher:
612+
scalacJVMVersion := (if (System.getProperty("scala-2.11") == "true") "1.8" else "1.7")
613+
)
614+
}
615+
594616
object TestSettings {
595617
import BuildCommons._
596618

repl/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@
9595
<groupId>org.apache.spark</groupId>
9696
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
9797
</dependency>
98+
<dependency>
99+
<groupId>org.apache.xbean</groupId>
100+
<artifactId>xbean-asm5-shaded</artifactId>
101+
</dependency>
98102

99103
<!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
100104
<dependency>

repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,14 @@ import java.net.{HttpURLConnection, URI, URL, URLEncoder}
2323
import scala.util.control.NonFatal
2424

2525
import org.apache.hadoop.fs.{FileSystem, Path}
26+
import org.apache.xbean.asm5._
27+
import org.apache.xbean.asm5.Opcodes._
2628

2729
import org.apache.spark.{SparkConf, SparkEnv, Logging}
2830
import org.apache.spark.deploy.SparkHadoopUtil
2931
import org.apache.spark.util.Utils
3032
import org.apache.spark.util.ParentClassLoader
3133

32-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm._
33-
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._
34-
3534
/**
3635
* A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
3736
* used to load classes defined by the interpreter when the REPL is used.
@@ -192,7 +191,7 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader
192191
}
193192

194193
class ConstructorCleaner(className: String, cv: ClassVisitor)
195-
extends ClassVisitor(ASM4, cv) {
194+
extends ClassVisitor(ASM5, cv) {
196195
override def visitMethod(access: Int, name: String, desc: String,
197196
sig: String, exceptions: Array[String]): MethodVisitor = {
198197
val mv = cv.visitMethod(access, name, desc, sig, exceptions)
@@ -202,7 +201,7 @@ extends ClassVisitor(ASM4, cv) {
202201
// field in the class to point to it, but do nothing otherwise.
203202
mv.visitCode()
204203
mv.visitVarInsn(ALOAD, 0) // load this
205-
mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V")
204+
mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false)
206205
mv.visitVarInsn(ALOAD, 0) // load this
207206
// val classType = className.replace('.', '/')
208207
// mv.visitFieldInsn(PUTSTATIC, classType, "MODULE$", "L" + classType + ";")

0 commit comments

Comments
 (0)