1717
1818package org .apache .spark .sql .internal
1919
20+ import java .io .File
21+
2022import org .apache .hadoop .conf .Configuration
23+ import org .apache .hadoop .fs .Path
2124
2225import org .apache .spark .{SparkConf , SparkContext }
2326import org .apache .spark .sql ._
@@ -35,6 +38,10 @@ import org.apache.spark.sql.util.ExecutionListenerManager
3538
3639/**
3740 * A class that holds all session-specific state in a given [[SparkSession ]].
41+ * @param sparkContext The [[SparkContext ]].
42+ * @param sharedState The shared state.
43+ * @param conf SQL-specific key-value configurations.
44+ * @param experimentalMethods The experimental methods.
3845 * @param functionRegistry Internal catalog for managing functions registered by the user.
3946 * @param catalog Internal catalog for managing table and database states.
4047 * @param sqlParser Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
@@ -55,42 +62,60 @@ private[sql] class SessionState(
5562 val streamingQueryManager : StreamingQueryManager ,
5663 val queryExecutionCreator : LogicalPlan => QueryExecution ) {
5764
65+ def newHadoopConf (): Configuration = SessionState .newHadoopConf(
66+ sparkContext.hadoopConfiguration,
67+ conf)
68+
69+ def newHadoopConfWithOptions (options : Map [String , String ]): Configuration = {
70+ val hadoopConf = newHadoopConf()
71+ options.foreach { case (k, v) =>
72+ if ((v ne null ) && k != " path" && k != " paths" ) {
73+ hadoopConf.set(k, v)
74+ }
75+ }
76+ hadoopConf
77+ }
78+
79+ /**
80+ * A class for loading resources specified by a function.
81+ */
82+ val functionResourceLoader : FunctionResourceLoader = {
83+ new FunctionResourceLoader {
84+ override def loadResource (resource : FunctionResource ): Unit = {
85+ resource.resourceType match {
86+ case JarResource => addJar(resource.uri)
87+ case FileResource => sparkContext.addFile(resource.uri)
88+ case ArchiveResource =>
89+ throw new AnalysisException (
90+ " Archive is not allowed to be loaded. If YARN mode is used, " +
91+ " please use --archives options while calling spark-submit." )
92+ }
93+ }
94+ }
95+ }
96+
5897 /**
5998 * Interface exposed to the user for registering user-defined functions.
6099 * Note that the user-defined functions must be deterministic.
61100 */
62101 val udf : UDFRegistration = new UDFRegistration (functionRegistry)
63102
64103 /**
65- * Logical query plan optimizer.
104+ * Logical query plan optimizer.
66105 */
67106 val optimizer : Optimizer = new SparkOptimizer (catalog, conf, experimentalMethods)
68107
69- /**
70- * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener ]]s
71- * that listen for execution metrics.
72- */
73- val listenerManager : ExecutionListenerManager = new ExecutionListenerManager
74-
75108 /**
76109 * Planner that converts optimized logical plans to physical plans.
77110 */
78111 def planner : SparkPlanner =
79112 new SparkPlanner (sparkContext, conf, experimentalMethods.extraStrategies)
80113
81- def newHadoopConf (): Configuration = SessionState .newHadoopConf(
82- sparkContext.hadoopConfiguration,
83- conf)
84-
85- def newHadoopConfWithOptions (options : Map [String , String ]): Configuration = {
86- val hadoopConf = newHadoopConf()
87- options.foreach { case (k, v) =>
88- if ((v ne null ) && k != " path" && k != " paths" ) {
89- hadoopConf.set(k, v)
90- }
91- }
92- hadoopConf
93- }
114+ /**
115+ * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener ]]s
116+ * that listen for execution metrics.
117+ */
118+ val listenerManager : ExecutionListenerManager = new ExecutionListenerManager
94119
95120 /**
96121 * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
@@ -100,7 +125,7 @@ private[sql] class SessionState(
100125 val confCopy = conf.clone()
101126 val functionRegistryCopy = functionRegistry.clone()
102127 val sqlParser : ParserInterface = new SparkSqlParser (confCopy)
103- val catalogCopy = catalog.clone (
128+ val catalogCopy = catalog.newSessionCatalogWith (
104129 confCopy,
105130 SessionState .newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
106131 functionRegistryCopy,
@@ -132,7 +157,26 @@ private[sql] class SessionState(
132157 catalog.refreshTable(sqlParser.parseTableIdentifier(tableName))
133158 }
134159
135- def addJar (path : String ): Unit = sharedState.addJar(path)
160+ /**
161+ * Add a jar path to [[SparkContext ]] and the classloader.
162+ *
163+ * Note: this method seems not access any session state, but the subclass `HiveSessionState` needs
164+ * to add the jar to its hive client for the current session. Hence, it still needs to be in
165+ * [[SessionState ]].
166+ */
167+ def addJar (path : String ): Unit = {
168+ sparkContext.addJar(path)
169+ val uri = new Path (path).toUri
170+ val jarURL = if (uri.getScheme == null ) {
171+ // `path` is a local file path without a URL scheme
172+ new File (path).toURI.toURL
173+ } else {
174+ // `path` is a URL with a scheme
175+ uri.toURL
176+ }
177+ sharedState.jarClassLoader.addURL(jarURL)
178+ Thread .currentThread().setContextClassLoader(sharedState.jarClassLoader)
179+ }
136180}
137181
138182
@@ -150,16 +194,11 @@ object SessionState {
150194
151195 val functionRegistry = FunctionRegistry .builtin.clone()
152196
153- // A class for loading resources specified by a function.
154- val functionResourceLoader : FunctionResourceLoader =
155- createFunctionResourceLoader(sparkContext, sparkSession.sharedState)
156-
157197 val sqlParser : ParserInterface = new SparkSqlParser (sqlConf)
158198
159199 val catalog = new SessionCatalog (
160200 sparkSession.sharedState.externalCatalog,
161201 sparkSession.sharedState.globalTempViewManager,
162- functionResourceLoader,
163202 functionRegistry,
164203 sqlConf,
165204 newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
@@ -171,7 +210,7 @@ object SessionState {
171210
172211 val queryExecutionCreator = (plan : LogicalPlan ) => new QueryExecution (sparkSession, plan)
173212
174- new SessionState (
213+ val sessionState = new SessionState (
175214 sparkContext,
176215 sparkSession.sharedState,
177216 sqlConf,
@@ -182,23 +221,11 @@ object SessionState {
182221 analyzer,
183222 streamingQueryManager,
184223 queryExecutionCreator)
185- }
186-
187- def createFunctionResourceLoader (
188- sparkContext : SparkContext ,
189- sharedState : SharedState ): FunctionResourceLoader = {
190- new FunctionResourceLoader {
191- override def loadResource (resource : FunctionResource ): Unit = {
192- resource.resourceType match {
193- case JarResource => sharedState.addJar(resource.uri)
194- case FileResource => sparkContext.addFile(resource.uri)
195- case ArchiveResource =>
196- throw new AnalysisException (
197- " Archive is not allowed to be loaded. If YARN mode is used, " +
198- " please use --archives options while calling spark-submit." )
199- }
200- }
201- }
224+ // functionResourceLoader needs to access SessionState.addJar, so it cannot be created before
225+ // creating SessionState. Setting `catalog.functionResourceLoader` here is safe since the caller
226+ // cannot use SessionCatalog before we return SessionState.
227+ catalog.functionResourceLoader = sessionState.functionResourceLoader
228+ sessionState
202229 }
203230
204231 def newHadoopConf (hadoopConf : Configuration , sqlConf : SQLConf ): Configuration = {
0 commit comments