Skip to content

Commit 2c2062e

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-31498][SQL][DOCS] Dump public static sql configurations through doc generation
### What changes were proposed in this pull request? Currently, only the non-static public SQL configurations are dump to public doc, we'd better also add those static public ones as the command `set -v` This PR force call StaticSQLConf to buildStaticConf. ### Why are the changes needed? Fix missing SQL configurations in doc ### Does this PR introduce any user-facing change? NO ### How was this patch tested? add unit test and verify locally to see if public static SQL conf is in `docs/sql-config.html` Closes #28274 from yaooqinn/SPARK-31498. Authored-by: Kent Yao <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent cf60384 commit 2c2062e

File tree

5 files changed

+119
-13
lines changed

5 files changed

+119
-13
lines changed

docs/configuration.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2622,11 +2622,32 @@ Please refer to the [Security](security.html) page for available options on how
26222622
Spark subsystems.
26232623

26242624

2625-
{% for static_file in site.static_files %}
2626-
{% if static_file.name == 'generated-sql-configuration-table.html' %}
26272625
### Spark SQL
26282626

2629-
{% include_relative generated-sql-configuration-table.html %}
2627+
#### Runtime SQL Configuration
2628+
2629+
Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file
2630+
and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`.
2631+
Also, they can be set and queried by SET commands and rest to their initial values by RESET command,
2632+
or by `SparkSession.conf`'s setter and getter methods in runtime.
2633+
2634+
{% for static_file in site.static_files %}
2635+
{% if static_file.name == 'generated-runtime-sql-config-table.html' %}
2636+
{% include_relative generated-runtime-sql-config-table.html %}
2637+
{% break %}
2638+
{% endif %}
2639+
{% endfor %}
2640+
2641+
2642+
#### Static SQL Configuration
2643+
2644+
Static SQL configurations are cross-session, immutable Spark SQL configurations. They can be set with final values by the config file
2645+
and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`.
2646+
External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them.
2647+
2648+
{% for static_file in site.static_files %}
2649+
{% if static_file.name == 'generated-static-sql-config-table.html' %}
2650+
{% include_relative generated-static-sql-config-table.html %}
26302651
{% break %}
26312652
{% endif %}
26322653
{% endfor %}

sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ import java.nio.channels.Channels
2323
import org.apache.spark.api.java.JavaRDD
2424
import org.apache.spark.api.python.PythonRDDServer
2525
import org.apache.spark.rdd.RDD
26-
import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
26+
import org.apache.spark.sql.{DataFrame, SQLContext}
2727
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
2828
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
2929
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
3030
import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
3131
import org.apache.spark.sql.execution.arrow.ArrowConverters
32-
import org.apache.spark.sql.internal.SQLConf
32+
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
3333
import org.apache.spark.sql.types.DataType
3434

3535
private[sql] object PythonSQLUtils {
@@ -43,7 +43,14 @@ private[sql] object PythonSQLUtils {
4343
def listSQLConfigs(): Array[(String, String, String, String)] = {
4444
val conf = new SQLConf()
4545
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
46-
conf.getAllDefinedConfs.toArray
46+
conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray
47+
}
48+
49+
def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
50+
val conf = new SQLConf()
51+
// Force to build static SQL configurations
52+
StaticSQLConf
53+
conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray
4754
}
4855

4956
/**
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.api.python
19+
20+
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
22+
23+
class PythonSQLUtilsSuite extends SparkFunSuite {
24+
25+
test("listing sql configurations contains runtime ones only") {
26+
val configs = PythonSQLUtils.listSQLConfigs()
27+
28+
// static sql configurations
29+
assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),
30+
"listSQLConfigs should contain public static sql configuration")
31+
assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key),
32+
"listSQLConfigs should not contain internal static sql configuration")
33+
34+
// dynamic sql configurations
35+
assert(configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key),
36+
"listSQLConfigs should contain public dynamic sql configuration")
37+
assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key),
38+
"listSQLConfigs should not contain internal dynamic sql configuration")
39+
40+
// spark core configurations
41+
assert(!configs.exists(entry => entry._1 == "spark.master"),
42+
"listSQLConfigs should not contain core configuration")
43+
}
44+
45+
test("listing static sql configurations contains public static ones only") {
46+
val configs = PythonSQLUtils.listStaticSQLConfigs()
47+
48+
// static sql configurations
49+
assert(configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),
50+
"listStaticSQLConfigs should contain public static sql configuration")
51+
assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key),
52+
"listStaticSQLConfigs should not contain internal static sql configuration")
53+
54+
// dynamic sql configurations
55+
assert(!configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key),
56+
"listStaticSQLConfigs should not contain dynamic sql configuration")
57+
assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key),
58+
"listStaticSQLConfigs should not contain internal dynamic sql configuration")
59+
60+
// spark core configurations
61+
assert(!configs.exists(entry => entry._1 == "spark.master"),
62+
"listStaticSQLConfigs should not contain core configuration")
63+
}
64+
}

sql/create-docs.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,11 @@ mkdir docs
4545
echo "Generating SQL API Markdown files."
4646
"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py
4747

48-
echo "Generating SQL configuration table HTML file."
49-
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py
48+
echo "Generating runtime SQL runtime configuration table HTML file."
49+
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime
50+
51+
echo "Generating static SQL configuration table HTML file."
52+
"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static
5053

5154
echo "Generating HTML files for SQL function table and examples."
5255
"$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py

sql/gen-sql-config-docs.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import os
1919
import re
20+
import sys
2021
from collections import namedtuple
2122
from textwrap import dedent
2223

@@ -30,15 +31,19 @@
3031
"SQLConfEntry", ["name", "default", "description", "version"])
3132

3233

33-
def get_public_sql_configs(jvm):
34+
def get_public_sql_configs(jvm, group):
35+
if group == "static":
36+
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
37+
else:
38+
config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
3439
sql_configs = [
3540
SQLConfEntry(
3641
name=_sql_config._1(),
3742
default=_sql_config._2(),
3843
description=_sql_config._3(),
3944
version=_sql_config._4()
4045
)
41-
for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
46+
for _sql_config in config_set
4247
]
4348
return sql_configs
4449

@@ -114,11 +119,17 @@ def generate_sql_configs_table_html(sql_configs, path):
114119

115120

116121
if __name__ == "__main__":
122+
if len(sys.argv) != 2:
123+
print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py <static|runtime>")
124+
sys.exit(-1)
125+
else:
126+
group = sys.argv[1]
127+
117128
jvm = launch_gateway().jvm
118-
sql_configs = get_public_sql_configs(jvm)
129+
sql_configs = get_public_sql_configs(jvm, group)
119130

120131
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
121-
sql_configs_table_path = os.path.join(
122-
spark_root_dir, "docs/generated-sql-configuration-table.html")
132+
sql_configs_table_path = os.path\
133+
.join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html")
123134

124135
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)

0 commit comments

Comments
 (0)