Skip to content

Commit 8cd6a96

Browse files
jiangxb1987cloud-fan
authored andcommitted
[SPARK-23525][SQL] Support ALTER TABLE CHANGE COLUMN COMMENT for external hive table
## What changes were proposed in this pull request? The following query doesn't work as expected: ``` CREATE EXTERNAL TABLE ext_table(a STRING, b INT, c STRING) PARTITIONED BY (d STRING) LOCATION 'sql/core/spark-warehouse/ext_table'; ALTER TABLE ext_table CHANGE a a STRING COMMENT "new comment"; DESC ext_table; ``` The comment of column `a` is not updated, that's because `HiveExternalCatalog.doAlterTable` ignores table schema changes. To fix the issue, we should call `doAlterTableDataSchema` instead of `doAlterTable`. ## How was this patch tested? Updated `DDLSuite.testChangeColumn`. Author: Xingbo Jiang <[email protected]> Closes #20696 from jiangxb1987/alterColumnComment. (cherry picked from commit ac76eff) Signed-off-by: Wenchen Fan <[email protected]>
1 parent 66c1978 commit 8cd6a96

File tree

4 files changed

+22
-11
lines changed

4 files changed

+22
-11
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ case class AlterTableChangeColumnCommand(
314314
val resolver = sparkSession.sessionState.conf.resolver
315315
DDLUtils.verifyAlterTableType(catalog, table, isView = false)
316316

317-
// Find the origin column from schema by column name.
318-
val originColumn = findColumnByName(table.schema, columnName, resolver)
317+
// Find the origin column from dataSchema by column name.
318+
val originColumn = findColumnByName(table.dataSchema, columnName, resolver)
319319
// Throw an AnalysisException if the column name/dataType is changed.
320320
if (!columnEqual(originColumn, newColumn, resolver)) {
321321
throw new AnalysisException(
@@ -324,16 +324,15 @@ case class AlterTableChangeColumnCommand(
324324
s"'${newColumn.name}' with type '${newColumn.dataType}'")
325325
}
326326

327-
val newSchema = table.schema.fields.map { field =>
327+
val newDataSchema = table.dataSchema.fields.map { field =>
328328
if (field.name == originColumn.name) {
329329
// Create a new column from the origin column with the new comment.
330330
addComment(field, newColumn.getComment)
331331
} else {
332332
field
333333
}
334334
}
335-
val newTable = table.copy(schema = StructType(newSchema))
336-
catalog.alterTable(newTable)
335+
catalog.alterTableDataSchema(tableName, StructType(newDataSchema))
337336

338337
Seq.empty[Row]
339338
}
@@ -345,7 +344,8 @@ case class AlterTableChangeColumnCommand(
345344
schema.fields.collectFirst {
346345
case field if resolver(field.name, name) => field
347346
}.getOrElse(throw new AnalysisException(
348-
s"Invalid column reference '$name', table schema is '${schema}'"))
347+
s"Can't find column `$name` given table data columns " +
348+
s"${schema.fieldNames.mkString("[`", "`, `", "`]")}"))
349349
}
350350

351351
// Add the comment to a column, if comment is empty, return the original column.

sql/core/src/test/resources/sql-tests/inputs/change-column.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column
4949
-- Change column in partition spec (not supported yet)
5050
CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d);
5151
ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT;
52+
ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C';
5253

5354
-- DROP TEST TABLE
5455
DROP TABLE test_change;

sql/core/src/test/resources/sql-tests/results/change-column.sql.out

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 32
2+
-- Number of queries: 33
33

44

55
-- !query 0
@@ -154,7 +154,7 @@ ALTER TABLE test_change CHANGE invalid_col invalid_col INT
154154
struct<>
155155
-- !query 15 output
156156
org.apache.spark.sql.AnalysisException
157-
Invalid column reference 'invalid_col', table schema is 'StructType(StructField(a,IntegerType,true), StructField(b,StringType,true), StructField(c,IntegerType,true))';
157+
Can't find column `invalid_col` given table data columns [`a`, `b`, `c`];
158158

159159

160160
-- !query 16
@@ -291,16 +291,25 @@ ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT
291291

292292

293293
-- !query 30
294-
DROP TABLE test_change
294+
ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C'
295295
-- !query 30 schema
296296
struct<>
297297
-- !query 30 output
298-
298+
org.apache.spark.sql.AnalysisException
299+
Can't find column `c` given table data columns [`a`, `b`];
299300

300301

301302
-- !query 31
302-
DROP TABLE partition_table
303+
DROP TABLE test_change
303304
-- !query 31 schema
304305
struct<>
305306
-- !query 31 output
306307

308+
309+
310+
-- !query 32
311+
DROP TABLE partition_table
312+
-- !query 32 schema
313+
struct<>
314+
-- !query 32 output
315+

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
15681568
// Ensure that change column will preserve other metadata fields.
15691569
sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 col1 INT COMMENT 'this is col1'")
15701570
assert(getMetadata("col1").getString("key") == "value")
1571+
assert(getMetadata("col1").getString("comment") == "this is col1")
15711572
}
15721573

15731574
test("drop build-in function") {

0 commit comments

Comments
 (0)