From f2bb001b4e61d0dd8614ee05c8c22c82795d66ba Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:28:39 -0500 Subject: [PATCH 01/29] include sql configs from html file --- docs/_includes/.gitignore | 1 + docs/configuration.md | 41 ++------------------------------------- 2 files changed, 3 insertions(+), 39 deletions(-) create mode 100644 docs/_includes/.gitignore diff --git a/docs/_includes/.gitignore b/docs/_includes/.gitignore new file mode 100644 index 000000000000..2260493b46ab --- /dev/null +++ b/docs/_includes/.gitignore @@ -0,0 +1 @@ +sql-configs.html diff --git a/docs/configuration.md b/docs/configuration.md index 2febfe9744d5..af18a6169d5d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2399,47 +2399,10 @@ the driver or executor, or, in the absence of that value, the number of cores av Please refer to the [Security](security.html) page for available options on how to secure different Spark subsystems. -### Spark SQL - -Running the SET -v command will show the entire list of the SQL configuration. - -
-
-{% highlight scala %} -// spark is an existing SparkSession -spark.sql("SET -v").show(numRows = 200, truncate = false) -{% endhighlight %} - -
- -
- -{% highlight java %} -// spark is an existing SparkSession -spark.sql("SET -v").show(200, false); -{% endhighlight %} -
- -
- -{% highlight python %} -# spark is an existing SparkSession -spark.sql("SET -v").show(n=200, truncate=False) -{% endhighlight %} - -
- -
- -{% highlight r %} -sparkR.session() -properties <- sql("SET -v") -showDF(properties, numRows = 200, truncate = FALSE) -{% endhighlight %} +### Spark SQL -
-
+{% include sql-configs.html %} ### Spark Streaming From f0137f692c9949a5bb16f7011cb8820c38514fdd Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:29:34 -0500 Subject: [PATCH 02/29] generate html table of sql configs --- docs/README.md | 2 +- .../spark/sql/api/python/PythonSQLUtils.scala | 6 ++ sql/gen-sql-markdown.py | 82 ++++++++++++++++++- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index 22039871cf63..2262295c94a6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -58,7 +58,7 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b To generate API docs for any language, you'll need to install these libraries: ```sh -$ sudo pip install sphinx mkdocs numpy +$ sudo pip install sphinx mkdocs numpy markdown ``` ## Generating the Documentation HTML diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index b232aa18c816..132cef4dc59e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.{ExplainMode, QueryExecution} import org.apache.spark.sql.execution.arrow.ArrowConverters +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { @@ -39,6 +40,11 @@ private[sql] object PythonSQLUtils { FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray } + def listSQLConfigs(): Array[(String, String, String)] = { + val conf = new SQLConf() + conf.getAllDefinedConfs.toArray + } + /** * Python callable function to read a file in Arrow stream format and create a [[RDD]] * using each serialized ArrowRecordBatch as a partition. diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index e0529f831061..f95a26e9e52b 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -15,12 +15,18 @@ # limitations under the License. # -import sys import os +import re +import sys from collections import namedtuple +from textwrap import dedent + +from markdown import markdown ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") +SQLConfEntry = namedtuple( + "SQLConfEntry", ["name", "default", "docstring"]) def _list_function_infos(jvm): @@ -47,6 +53,18 @@ def _list_function_infos(jvm): return sorted(infos, key=lambda i: i.name) +def _list_sql_configs(jvm): + sql_configs = { + _sql_config._1(): SQLConfEntry( + name=_sql_config._1(), + default=_sql_config._2(), + docstring=_sql_config._3(), + ) + for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() + } + return sql_configs + + def _make_pretty_usage(usage): """ Makes the usage description pretty and returns a formatted string if `usage` @@ -218,9 +236,69 @@ def generate_sql_markdown(jvm, path): mdfile.write("
\n\n") +def generate_sql_configs_table(jvm, path): + """ + Generates an HTML table at `path` that lists all public SQL + configuration options. + """ + sql_configs = _list_sql_configs(jvm) + value_reference_pattern = re.compile(r"^$") + # ConfigEntry(key=spark.buffer.size, defaultValue=65536, doc=, public=true) + config_entry_pattern = re.compile(r"ConfigEntry\(key=(\S*), defaultValue=\S*, doc=\S*, public=\S*\)") + + with open(path, 'w') as f: + f.write(dedent( + """ + + + """ + )) + for name, config in sorted(sql_configs.items()): + if config.default == "": + default = "none" + elif config.default.startswith("", config.docstring) + + f.write(dedent( + """ + + + + + + """ + .format( + name=name, + default=default, + docstring=markdown(docstring), + ) + )) + f.write("
Property NameDefaultMeaning
{name}{default}{docstring}
\n") + + if __name__ == "__main__": from pyspark.java_gateway import launch_gateway jvm = launch_gateway().jvm - markdown_file_path = "%s/docs/index.md" % os.path.dirname(sys.argv[0]) + spark_home = os.path.dirname(os.path.dirname(__file__)) + + markdown_file_path = os.path.join(spark_home, "sql/docs/index.md") + sql_configs_table_path = os.path.join(spark_home, "docs/_includes/sql-configs.html") + generate_sql_markdown(jvm, markdown_file_path) + generate_sql_configs_table(jvm, sql_configs_table_path) From a6045f7cef50b53fad3c0629c1800925bf515971 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:30:02 -0500 Subject: [PATCH 03/29] example of follow-up that is needed to clean docstrings --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3ad3416256c7..06dc8aee0661 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1152,7 +1152,7 @@ object SQLConf { val VARIABLE_SUBSTITUTE_ENABLED = buildConf("spark.sql.variable.substitute") - .doc("This enables substitution using syntax like ${var} ${system:var} and ${env:var}.") + .doc("This enables substitution using syntax like `${var}`, `${system:var}`, and `${env:var}`.") .booleanConf .createWithDefault(true) From 7742fc1b52acd3ff49c657a918bea0598c0be411 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:39:45 -0500 Subject: [PATCH 04/29] back to a plain list --- sql/gen-sql-markdown.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index f95a26e9e52b..1c49c0d9cd77 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -54,14 +54,14 @@ def _list_function_infos(jvm): def _list_sql_configs(jvm): - sql_configs = { - _sql_config._1(): SQLConfEntry( + sql_configs = [ + SQLConfEntry( name=_sql_config._1(), default=_sql_config._2(), docstring=_sql_config._3(), ) for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() - } + ] return sql_configs @@ -253,7 +253,7 @@ def generate_sql_configs_table(jvm, path): Property NameDefaultMeaning """ )) - for name, config in sorted(sql_configs.items()): + for config in sorted(sql_configs, key=lambda x: x.name): if config.default == "": default = "none" elif config.default.startswith(" """ .format( - name=name, + name=config.name, default=default, docstring=markdown(docstring), ) From a3b6d199f27381f2220713c10df7bd8c769008dc Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:51:42 -0500 Subject: [PATCH 05/29] style --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 06dc8aee0661..c54cb41db5ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1152,7 +1152,8 @@ object SQLConf { val VARIABLE_SUBSTITUTE_ENABLED = buildConf("spark.sql.variable.substitute") - .doc("This enables substitution using syntax like `${var}`, `${system:var}`, and `${env:var}`.") + .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " + + "and `${env:var}`.") .booleanConf .createWithDefault(true) From 886d01e6959548336f9183b1c617e0db1b6b94f5 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 16:58:55 -0500 Subject: [PATCH 06/29] python style --- sql/gen-sql-markdown.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 1c49c0d9cd77..e7c76b47ad54 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -244,7 +244,8 @@ def generate_sql_configs_table(jvm, path): sql_configs = _list_sql_configs(jvm) value_reference_pattern = re.compile(r"^$") # ConfigEntry(key=spark.buffer.size, defaultValue=65536, doc=, public=true) - config_entry_pattern = re.compile(r"ConfigEntry\(key=(\S*), defaultValue=\S*, doc=\S*, public=\S*\)") + config_entry_pattern = re.compile( + r"ConfigEntry\(key=(\S*), defaultValue=\S*, doc=\S*, public=\S*\)") with open(path, 'w') as f: f.write(dedent( @@ -258,7 +259,9 @@ def generate_sql_configs_table(jvm, path): default = "none" elif config.default.startswith(" Date: Tue, 4 Feb 2020 17:01:17 -0500 Subject: [PATCH 07/29] catch missed special default values --- sql/gen-sql-markdown.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index e7c76b47ad54..0d53710cd1a0 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -263,17 +263,18 @@ def generate_sql_configs_table(jvm, path): # a) potential recursion # b) references to non-SQL configs default = "value of {}".format(referenced_config_name) - elif config.default.startswith("<"): + else: + default = config.default + + if default.startswith("<"): raise Exception( - "Unhandled reference in SQL config docs. Config {name} " + "Unhandled reference in SQL config docs. Config '{name}' " "has default '{default}' that looks like an HTML tag." .format( name=name, default=config.default, ) ) - else: - default = config.default docstring = config_entry_pattern.sub(r"\g<1>", config.docstring) From 914630f98b5733682700b86fa5e9c7434a4b9b0d Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 17:07:07 -0500 Subject: [PATCH 08/29] fix reference --- sql/gen-sql-markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 0d53710cd1a0..003510582120 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -271,7 +271,7 @@ def generate_sql_configs_table(jvm, path): "Unhandled reference in SQL config docs. Config '{name}' " "has default '{default}' that looks like an HTML tag." .format( - name=name, + name=config.name, default=config.default, ) ) From a8ad179e6b5b57ae90dde7e81bcad9107a4a542f Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 21:17:39 -0500 Subject: [PATCH 09/29] tweak create-docs info message --- sql/create-docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 4353708d22f7..29fb0f4d14a3 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -42,7 +42,7 @@ pushd "$FWDIR" > /dev/null # Now create the markdown file rm -fr docs mkdir docs -echo "Generating markdown files for SQL documentation." +echo "Generating SQL documentation: markdown files + config table" "$SPARK_HOME/bin/spark-submit" gen-sql-markdown.py # Now create the HTML files From e09f89050d63c0faf670d601dac854fabd60eed5 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 21:19:21 -0500 Subject: [PATCH 10/29] use markdown via mkdocs --- docs/README.md | 2 +- sql/gen-sql-markdown.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index 2262295c94a6..22039871cf63 100644 --- a/docs/README.md +++ b/docs/README.md @@ -58,7 +58,7 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b To generate API docs for any language, you'll need to install these libraries: ```sh -$ sudo pip install sphinx mkdocs numpy markdown +$ sudo pip install sphinx mkdocs numpy ``` ## Generating the Documentation HTML diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 003510582120..6a51a293b0b1 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -21,7 +21,7 @@ from collections import namedtuple from textwrap import dedent -from markdown import markdown +from mkdocs.structure.pages.markdown import markdown ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") From 851d55749dddf617d1cc6c3021f29f2f4f5b4b3f Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 21:22:05 -0500 Subject: [PATCH 11/29] explain why .toArray --- .../scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 132cef4dc59e..bf3055d5e3e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -42,6 +42,7 @@ private[sql] object PythonSQLUtils { def listSQLConfigs(): Array[(String, String, String)] = { val conf = new SQLConf() + // Py4J doesn't seem to translate Seq well, so we convert to an Array. conf.getAllDefinedConfs.toArray } From 36fd916b8663f0e35f4f3e5d90fbd14edf018dd7 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 21:29:32 -0500 Subject: [PATCH 12/29] use markdown from within mkdocs --- sql/gen-sql-markdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 6a51a293b0b1..cd623cb31930 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -21,7 +21,7 @@ from collections import namedtuple from textwrap import dedent -from mkdocs.structure.pages.markdown import markdown +from mkdocs.structure.pages import markdown ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") @@ -289,7 +289,7 @@ def generate_sql_configs_table(jvm, path): .format( name=config.name, default=default, - docstring=markdown(docstring), + docstring=markdown.markdown(docstring), ) )) f.write("\n") From 6ca51cd4530ae91e307819efddf84aff442dc359 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 4 Feb 2020 22:25:14 -0500 Subject: [PATCH 13/29] fix config reference + undo ConfigEntry removal --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 8 ++++---- sql/gen-sql-markdown.py | 7 +------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c54cb41db5ff..92fca8ac219f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1603,10 +1603,10 @@ object SQLConf { val PANDAS_UDF_BUFFER_SIZE = buildConf("spark.sql.pandas.udf.buffer.size") .doc( - s"Same as ${BUFFER_SIZE} but only applies to Pandas UDF executions. If it is not set, " + - s"the fallback is ${BUFFER_SIZE}. Note that Pandas execution requires more than 4 bytes. " + - "Lowering this value could make small Pandas UDF batch iterated and pipelined; however, " + - "it might degrade performance. See SPARK-27870.") + s"Same as `${BUFFER_SIZE.key}` but only applies to Pandas UDF executions. If it is not " + + s"set, the fallback is `${BUFFER_SIZE.key}`. Note that Pandas execution requires more " + + "than 4 bytes. Lowering this value could make small Pandas UDF batch iterated and " + + "pipelined; however, it might degrade performance. See SPARK-27870.") .fallbackConf(BUFFER_SIZE) val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME = diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index cd623cb31930..7670e9b9ebd3 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -243,9 +243,6 @@ def generate_sql_configs_table(jvm, path): """ sql_configs = _list_sql_configs(jvm) value_reference_pattern = re.compile(r"^$") - # ConfigEntry(key=spark.buffer.size, defaultValue=65536, doc=, public=true) - config_entry_pattern = re.compile( - r"ConfigEntry\(key=(\S*), defaultValue=\S*, doc=\S*, public=\S*\)") with open(path, 'w') as f: f.write(dedent( @@ -276,8 +273,6 @@ def generate_sql_configs_table(jvm, path): ) ) - docstring = config_entry_pattern.sub(r"\g<1>", config.docstring) - f.write(dedent( """ @@ -289,7 +284,7 @@ def generate_sql_configs_table(jvm, path): .format( name=config.name, default=default, - docstring=markdown.markdown(docstring), + docstring=markdown.markdown(config.docstring), ) )) f.write("\n") From 43e47bdd89bf9ff3b383f28d3d882ae855e6c453 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 14:48:57 -0500 Subject: [PATCH 14/29] explain why weird import --- sql/gen-sql-markdown.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 7670e9b9ebd3..720c80ed7a0d 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -21,6 +21,7 @@ from collections import namedtuple from textwrap import dedent +# To avoid adding a new direct dependency, we import markdown from within mkdocs. from mkdocs.structure.pages import markdown ExpressionInfo = namedtuple( From 1c9aa71c6168d815267d592eb23b8d54188a379c Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 14:49:20 -0500 Subject: [PATCH 15/29] show example html output --- sql/gen-sql-markdown.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 720c80ed7a0d..8800b1faa74e 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -241,6 +241,23 @@ def generate_sql_configs_table(jvm, path): """ Generates an HTML table at `path` that lists all public SQL configuration options. + + The table will look something like this: + + ```html + + + + + + + + + + ... + +
Property NameDefaultMeaning
spark.sql.adaptive.enabledfalse

When true, enable adaptive query execution.

+ ``` """ sql_configs = _list_sql_configs(jvm) value_reference_pattern = re.compile(r"^$") From 65c5bb043dd2eb6bf39a39928a84342df1d1a397 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 14:49:47 -0500 Subject: [PATCH 16/29] unnecessary comment --- sql/gen-sql-markdown.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 8800b1faa74e..9b3db4a8c56f 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -274,9 +274,6 @@ def generate_sql_configs_table(jvm, path): default = "none" elif config.default.startswith(" Date: Thu, 6 Feb 2020 14:50:01 -0500 Subject: [PATCH 17/29] docstring -> description --- sql/gen-sql-markdown.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index 9b3db4a8c56f..15561fd6fb17 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -27,7 +27,7 @@ ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") SQLConfEntry = namedtuple( - "SQLConfEntry", ["name", "default", "docstring"]) + "SQLConfEntry", ["name", "default", "description"]) def _list_function_infos(jvm): @@ -59,7 +59,7 @@ def _list_sql_configs(jvm): SQLConfEntry( name=_sql_config._1(), default=_sql_config._2(), - docstring=_sql_config._3(), + description=_sql_config._3(), ) for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() ] @@ -293,13 +293,13 @@ def generate_sql_configs_table(jvm, path): {name} {default} - {docstring} + {description} """ .format( name=config.name, default=default, - docstring=markdown.markdown(config.docstring), + description=markdown.markdown(config.description), ) )) f.write("\n") From aa63cbe660abffef7cd4fa3f0a67c8ac76060b4d Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 15:36:49 -0500 Subject: [PATCH 18/29] split up sql docs script: API + Config --- sql/create-docs.sh | 12 +- ...en-sql-markdown.py => gen-sql-api-docs.py} | 95 +------------- sql/gen-sql-config-docs.py | 117 ++++++++++++++++++ 3 files changed, 126 insertions(+), 98 deletions(-) rename sql/{gen-sql-markdown.py => gen-sql-api-docs.py} (66%) create mode 100644 sql/gen-sql-config-docs.py diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 29fb0f4d14a3..da8e991ee999 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -39,14 +39,16 @@ fi pushd "$FWDIR" > /dev/null -# Now create the markdown file rm -fr docs mkdir docs -echo "Generating SQL documentation: markdown files + config table" -"$SPARK_HOME/bin/spark-submit" gen-sql-markdown.py -# Now create the HTML files -echo "Generating HTML files for SQL documentation." +echo "Generating SQL API Markdown files." +"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py + +echo "Generating SQL config HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py + +echo "Generating HTML files for SQL API documentation." mkdocs build --clean rm -fr docs diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-api-docs.py similarity index 66% rename from sql/gen-sql-markdown.py rename to sql/gen-sql-api-docs.py index 15561fd6fb17..0b0a517174fd 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-api-docs.py @@ -16,18 +16,10 @@ # import os -import re -import sys from collections import namedtuple -from textwrap import dedent - -# To avoid adding a new direct dependency, we import markdown from within mkdocs. -from mkdocs.structure.pages import markdown ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") -SQLConfEntry = namedtuple( - "SQLConfEntry", ["name", "default", "description"]) def _list_function_infos(jvm): @@ -54,18 +46,6 @@ def _list_function_infos(jvm): return sorted(infos, key=lambda i: i.name) -def _list_sql_configs(jvm): - sql_configs = [ - SQLConfEntry( - name=_sql_config._1(), - default=_sql_config._2(), - description=_sql_config._3(), - ) - for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() - ] - return sql_configs - - def _make_pretty_usage(usage): """ Makes the usage description pretty and returns a formatted string if `usage` @@ -237,82 +217,11 @@ def generate_sql_markdown(jvm, path): mdfile.write("
\n\n") -def generate_sql_configs_table(jvm, path): - """ - Generates an HTML table at `path` that lists all public SQL - configuration options. - - The table will look something like this: - - ```html - - - - - - - - - - ... - -
Property NameDefaultMeaning
spark.sql.adaptive.enabledfalse

When true, enable adaptive query execution.

- ``` - """ - sql_configs = _list_sql_configs(jvm) - value_reference_pattern = re.compile(r"^$") - - with open(path, 'w') as f: - f.write(dedent( - """ - - - """ - )) - for config in sorted(sql_configs, key=lambda x: x.name): - if config.default == "": - default = "none" - elif config.default.startswith(" - - - - - """ - .format( - name=config.name, - default=default, - description=markdown.markdown(config.description), - ) - )) - f.write("
Property NameDefaultMeaning
{name}{default}{description}
\n") - - if __name__ == "__main__": from pyspark.java_gateway import launch_gateway jvm = launch_gateway().jvm - spark_home = os.path.dirname(os.path.dirname(__file__)) - - markdown_file_path = os.path.join(spark_home, "sql/docs/index.md") - sql_configs_table_path = os.path.join(spark_home, "docs/_includes/sql-configs.html") + spark_root_dir = os.path.dirname(os.path.dirname(__file__)) + markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md") generate_sql_markdown(jvm, markdown_file_path) - generate_sql_configs_table(jvm, sql_configs_table_path) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py new file mode 100644 index 000000000000..329ef8d2107d --- /dev/null +++ b/sql/gen-sql-config-docs.py @@ -0,0 +1,117 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import re +from collections import namedtuple +from textwrap import dedent + +# To avoid adding a new direct dependency, we import markdown from within mkdocs. +from mkdocs.structure.pages import markdown + +SQLConfEntry = namedtuple( + "SQLConfEntry", ["name", "default", "description"]) + + +def _list_sql_configs(jvm): + sql_configs = [ + SQLConfEntry( + name=_sql_config._1(), + default=_sql_config._2(), + description=_sql_config._3(), + ) + for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() + ] + return sql_configs + + +def generate_sql_configs_table(jvm, path): + """ + Generates an HTML table at `path` that lists all public SQL + configuration options. + + The table will look something like this: + + ```html + + + + + + + + + + ... + +
Property NameDefaultMeaning
spark.sql.adaptive.enabledfalse

When true, enable adaptive query execution.

+ ``` + """ + sql_configs = _list_sql_configs(jvm) + value_reference_pattern = re.compile(r"^$") + + with open(path, 'w') as f: + f.write(dedent( + """ + + + """ + )) + for config in sorted(sql_configs, key=lambda x: x.name): + if config.default == "": + default = "none" + elif config.default.startswith(" + + + + + """ + .format( + name=config.name, + default=default, + description=markdown.markdown(config.description), + ) + )) + f.write("
Property NameDefaultMeaning
{name}{default}{description}
\n") + + +if __name__ == "__main__": + from pyspark.java_gateway import launch_gateway + + jvm = launch_gateway().jvm + spark_root_dir = os.path.dirname(os.path.dirname(__file__)) + + sql_configs_table_path = os.path.join(spark_root_dir, "docs/sql-configs.html") + generate_sql_configs_table(jvm, sql_configs_table_path) From ba8ae9ffc42ab71d7c12a30076b50eff3ca2e554 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 15:37:37 -0500 Subject: [PATCH 19/29] move sql-configs.html to root of docs/ --- docs/{_includes => }/.gitignore | 0 docs/configuration.md | 9 ++++++++- 2 files changed, 8 insertions(+), 1 deletion(-) rename docs/{_includes => }/.gitignore (100%) diff --git a/docs/_includes/.gitignore b/docs/.gitignore similarity index 100% rename from docs/_includes/.gitignore rename to docs/.gitignore diff --git a/docs/configuration.md b/docs/configuration.md index af18a6169d5d..c091c29ab334 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2400,9 +2400,16 @@ Please refer to the [Security](security.html) page for available options on how Spark subsystems. +{% for static_file in site.static_files %} + {% if static_file.name == 'sql-configs.html' %} ### Spark SQL -{% include sql-configs.html %} + {% include_relative sql-configs.html %} + {% break %} + {% endif %} +{% endfor %} + + ### Spark Streaming From 5310f084857ec16e1118eb4301f7106d562185c8 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 15:42:10 -0500 Subject: [PATCH 20/29] fix documentation around create-docs.sh --- sql/README.md | 2 +- sql/create-docs.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/README.md b/sql/README.md index 67e3225e2c27..ae5ebd1d7537 100644 --- a/sql/README.md +++ b/sql/README.md @@ -9,4 +9,4 @@ Spark SQL is broken up into four subprojects: - Hive Support (sql/hive) - Includes extensions that allow users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs. - HiveServer and CLI support (sql/hive-thriftserver) - Includes support for the SQL CLI (bin/spark-sql) and a HiveServer2 (for JDBC/ODBC) compatible server. -Running `./sql/create-docs.sh` generates SQL documentation for built-in functions under `sql/site`. +Running `./sql/create-docs.sh` generates SQL documentation for built-in functions under `sql/site`, and SQL configuration documentation that gets included as part of `configuration.md` in the main `docs` directory. diff --git a/sql/create-docs.sh b/sql/create-docs.sh index da8e991ee999..4c0f5de01bdb 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -17,7 +17,7 @@ # limitations under the License. # -# Script to create SQL API docs. This requires `mkdocs` and to build +# Script to create SQL API and config docs. This requires `mkdocs` and to build # Spark first. After running this script the html docs can be found in # $SPARK_HOME/sql/site From 61b4ac5310b0b9fad06ae63f31fc13c031e29776 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 15:42:18 -0500 Subject: [PATCH 21/29] unnecessary whitespace --- docs/configuration.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index c091c29ab334..6d7ac1f67edf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2410,8 +2410,6 @@ Spark subsystems. {% endfor %} - - ### Spark Streaming From 679bdacb436c63730cb20e01adcc0d6977fe4a33 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 16:17:45 -0500 Subject: [PATCH 22/29] group imports at beginning --- sql/gen-sql-api-docs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py index 0b0a517174fd..4feee7ad5257 100644 --- a/sql/gen-sql-api-docs.py +++ b/sql/gen-sql-api-docs.py @@ -18,6 +18,8 @@ import os from collections import namedtuple +from pyspark.java_gateway import launch_gateway + ExpressionInfo = namedtuple( "ExpressionInfo", "className name usage arguments examples note since deprecated") @@ -218,10 +220,7 @@ def generate_sql_markdown(jvm, path): if __name__ == "__main__": - from pyspark.java_gateway import launch_gateway - jvm = launch_gateway().jvm spark_root_dir = os.path.dirname(os.path.dirname(__file__)) - markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md") generate_sql_markdown(jvm, markdown_file_path) From 8401b6ae027a9871372244f70125540f2bd0f1f2 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Feb 2020 16:18:18 -0500 Subject: [PATCH 23/29] check for private configs --- sql/gen-sql-config-docs.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 329ef8d2107d..e511ba2a726b 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -22,12 +22,19 @@ # To avoid adding a new direct dependency, we import markdown from within mkdocs. from mkdocs.structure.pages import markdown +from pyspark.java_gateway import launch_gateway SQLConfEntry = namedtuple( "SQLConfEntry", ["name", "default", "description"]) +SELECTION_OF_PRIVATE_CONFIGS = { + "spark.sql.limit.scaleUpFactor", + "spark.sql.streaming.schemaInference", + "spark.sql.view.maxNestedViewDepth", +} -def _list_sql_configs(jvm): + +def get_public_sql_configs(jvm): sql_configs = [ SQLConfEntry( name=_sql_config._1(), @@ -39,7 +46,7 @@ def _list_sql_configs(jvm): return sql_configs -def generate_sql_configs_table(jvm, path): +def generate_sql_configs_table(sql_configs, path): """ Generates an HTML table at `path` that lists all public SQL configuration options. @@ -61,7 +68,6 @@ def generate_sql_configs_table(jvm, path):
``` """ - sql_configs = _list_sql_configs(jvm) value_reference_pattern = re.compile(r"^$") with open(path, 'w') as f: @@ -108,10 +114,17 @@ def generate_sql_configs_table(jvm, path): if __name__ == "__main__": - from pyspark.java_gateway import launch_gateway - jvm = launch_gateway().jvm - spark_root_dir = os.path.dirname(os.path.dirname(__file__)) + sql_configs = get_public_sql_configs(jvm) + private_configs = {_.name for _ in sql_configs}.intersection(SELECTION_OF_PRIVATE_CONFIGS) + if private_configs: + raise Exception( + "get_public_sql_configs() returned the following private configs:", + ', '.join(private_configs) + ) + + spark_root_dir = os.path.dirname(os.path.dirname(__file__)) sql_configs_table_path = os.path.join(spark_root_dir, "docs/sql-configs.html") - generate_sql_configs_table(jvm, sql_configs_table_path) + + generate_sql_configs_table(sql_configs, path=sql_configs_table_path) From e48eb34ec4996ff994e4d4424027a7a397cab8b3 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 7 Feb 2020 10:47:15 -0500 Subject: [PATCH 24/29] tweak default formatting --- sql/gen-sql-config-docs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index e511ba2a726b..ad974227e518 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -79,10 +79,10 @@ def generate_sql_configs_table(sql_configs, path): )) for config in sorted(sql_configs, key=lambda x: x.name): if config.default == "": - default = "none" + default = "(none)" elif config.default.startswith(" Date: Fri, 7 Feb 2020 10:47:31 -0500 Subject: [PATCH 25/29] remove check for private configs --- sql/gen-sql-config-docs.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index ad974227e518..04f5a850c998 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -27,12 +27,6 @@ SQLConfEntry = namedtuple( "SQLConfEntry", ["name", "default", "description"]) -SELECTION_OF_PRIVATE_CONFIGS = { - "spark.sql.limit.scaleUpFactor", - "spark.sql.streaming.schemaInference", - "spark.sql.view.maxNestedViewDepth", -} - def get_public_sql_configs(jvm): sql_configs = [ @@ -117,13 +111,6 @@ def generate_sql_configs_table(sql_configs, path): jvm = launch_gateway().jvm sql_configs = get_public_sql_configs(jvm) - private_configs = {_.name for _ in sql_configs}.intersection(SELECTION_OF_PRIVATE_CONFIGS) - if private_configs: - raise Exception( - "get_public_sql_configs() returned the following private configs:", - ', '.join(private_configs) - ) - spark_root_dir = os.path.dirname(os.path.dirname(__file__)) sql_configs_table_path = os.path.join(spark_root_dir, "docs/sql-configs.html") From 617a69d7722d548655360d2c4b1b6fd350ef3f46 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 7 Feb 2020 10:47:49 -0500 Subject: [PATCH 26/29] tweak info message from create-docs.sh --- sql/create-docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 4c0f5de01bdb..44aa877332fd 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -45,7 +45,7 @@ mkdir docs echo "Generating SQL API Markdown files." "$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py -echo "Generating SQL config HTML file." +echo "Generating SQL configuration table HTML file." "$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py echo "Generating HTML files for SQL API documentation." From 452bf980c99f16cd08e8d4062bdb067c4ff2fe57 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 7 Feb 2020 12:05:42 -0500 Subject: [PATCH 27/29] remove dup default --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 92fca8ac219f..e9c93d9ce8b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -821,7 +821,7 @@ object SQLConf { .createWithDefault(true) val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets") - .doc("The maximum number of buckets allowed. Defaults to 100000") + .doc("The maximum number of buckets allowed.") .intConf .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0") .createWithDefault(100000) @@ -1163,7 +1163,7 @@ object SQLConf { .doc("Enable two-level aggregate hash map. When enabled, records will first be " + "inserted/looked-up at a 1st-level, small, fast map, and then fallback to a " + "2nd-level, larger, slower map when 1st level is full or keys cannot be found. " + - "When disabled, records go directly to the 2nd level. Defaults to true.") + "When disabled, records go directly to the 2nd level.") .booleanConf .createWithDefault(true) From 4c32cf226694d87560b14b3c0f5a139daa956bb3 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 7 Feb 2020 12:06:06 -0500 Subject: [PATCH 28/29] stringify certain defaults so they display right --- .../apache/spark/sql/internal/SQLConf.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e9c93d9ce8b6..55f8080e8738 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -324,11 +324,11 @@ object SQLConf { .doc("Configures the maximum size in bytes for a table that will be broadcast to all worker " + "nodes when performing a join. By setting this value to -1 broadcasting can be disabled. " + "Note that currently statistics are only supported for Hive Metastore tables where the " + - "command ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan has been " + + "command `ANALYZE TABLE COMPUTE STATISTICS noscan` has been " + "run, and file-based data source tables where the statistics are computed directly on " + "the files of data.") .bytesConf(ByteUnit.BYTE) - .createWithDefault(10L * 1024 * 1024) + .createWithDefaultString("10MB") val LIMIT_SCALE_UP_FACTOR = buildConf("spark.sql.limit.scaleUpFactor") .internal() @@ -393,7 +393,7 @@ object SQLConf { s"an effect when '${ADAPTIVE_EXECUTION_ENABLED.key}' and " + s"'${REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key}' is enabled.") .bytesConf(ByteUnit.BYTE) - .createWithDefault(64 * 1024 * 1024) + .createWithDefaultString("64MB") val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS = buildConf("spark.sql.adaptive.shuffle.maxNumPostShufflePartitions") @@ -427,7 +427,7 @@ object SQLConf { .doc("Configures the minimum size in bytes for a partition that is considered as a skewed " + "partition in adaptive skewed join.") .bytesConf(ByteUnit.BYTE) - .createWithDefault(64 * 1024 * 1024) + .createWithDefaultString("64MB") val ADAPTIVE_EXECUTION_SKEWED_PARTITION_FACTOR = buildConf("spark.sql.adaptive.optimizeSkewedJoin.skewedPartitionFactor") @@ -761,7 +761,7 @@ object SQLConf { val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout") .doc("Timeout in seconds for the broadcast wait time in broadcast joins.") .timeConf(TimeUnit.SECONDS) - .createWithDefault(5 * 60) + .createWithDefaultString("300") // This is only used for the thriftserver val THRIFTSERVER_POOL = buildConf("spark.sql.thriftserver.scheduler.pool") @@ -1013,7 +1013,7 @@ object SQLConf { "This configuration is effective only when using file-based sources such as Parquet, JSON " + "and ORC.") .bytesConf(ByteUnit.BYTE) - .createWithDefault(128 * 1024 * 1024) // parquet.block.size + .createWithDefaultString("128MB") // parquet.block.size val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes") .internal() @@ -1317,10 +1317,10 @@ object SQLConf { val STREAMING_STOP_TIMEOUT = buildConf("spark.sql.streaming.stopTimeout") - .doc("How long to wait for the streaming execution thread to stop when calling the " + - "streaming query's stop() method in milliseconds. 0 or negative values wait indefinitely.") + .doc("How long to wait in milliseconds for the streaming execution thread to stop when " + + "calling the streaming query's stop() method. 0 or negative values wait indefinitely.") .timeConf(TimeUnit.MILLISECONDS) - .createWithDefault(0L) + .createWithDefaultString("0") val STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL = buildConf("spark.sql.streaming.noDataProgressEventInterval") @@ -2021,7 +2021,7 @@ object SQLConf { .checkValue(i => i >= 0 && i <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, "Invalid " + "value for 'spark.sql.maxPlanStringLength'. Length must be a valid string length " + "(nonnegative and shorter than the maximum size).") - .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) + .createWithDefaultString(s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}") val SET_COMMAND_REJECTS_SPARK_CORE_CONFS = buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs") From b08bac43a844376bc870cb99c776d50157c39a01 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 7 Feb 2020 18:13:01 -0500 Subject: [PATCH 29/29] 300 -> 5 * 60 --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 55f8080e8738..3362af267ca2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -761,7 +761,7 @@ object SQLConf { val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout") .doc("Timeout in seconds for the broadcast wait time in broadcast joins.") .timeConf(TimeUnit.SECONDS) - .createWithDefaultString("300") + .createWithDefaultString(s"${5 * 60}") // This is only used for the thriftserver val THRIFTSERVER_POOL = buildConf("spark.sql.thriftserver.scheduler.pool")