Skip to content

Commit 90f6f39

Browse files
imback82cloud-fan
authored andcommitted
[SPARK-33366][SQL] Migrate LOAD DATA command to use UnresolvedTable to resolve the identifier
### What changes were proposed in this pull request? This PR proposes to migrate `LOAD DATA` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `LOAD DATA` is not supported for v2 tables. ### Why are the changes needed? The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior: ```scala sql("CREATE TEMPORARY VIEW t AS SELECT 1") sql("CREATE DATABASE db") sql("CREATE TABLE t (key INT, value STRING) USING hive") sql("USE db") sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE t") // Succeeds ``` With this change, `LOAD DATA` above fails with the following: ``` org.apache.spark.sql.AnalysisException: t is a temp view not table.; line 1 pos 0 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$39(Analyzer.scala:865) at scala.Option.foreach(Option.scala:407) ``` , which is expected since temporary view is resolved first and `LOAD DATA` doesn't support a temporary view. ### Does this PR introduce _any_ user-facing change? After this PR, `LOAD DATA ... t` is resolved to a temp view `t` instead of table `db.t` in the above scenario. ### How was this patch tested? Updated existing tests. Closes #30270 from imback82/load_data_cmd. Authored-by: Terry Kim <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent a1f84d8 commit 90f6f39

File tree

9 files changed

+82
-68
lines changed

9 files changed

+82
-68
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3282,7 +3282,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
32823282
}
32833283

32843284
/**
3285-
* Create a [[LoadDataStatement]].
3285+
* Create a [[LoadData]].
32863286
*
32873287
* For example:
32883288
* {{{
@@ -3291,8 +3291,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
32913291
* }}}
32923292
*/
32933293
override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
3294-
LoadDataStatement(
3295-
tableName = visitMultipartIdentifier(ctx.multipartIdentifier),
3294+
LoadData(
3295+
child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
32963296
path = string(ctx.path),
32973297
isLocal = ctx.LOCAL != null,
32983298
isOverwrite = ctx.OVERWRITE != null,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,16 +347,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends
347347
*/
348348
case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
349349

350-
/**
351-
* A LOAD DATA INTO TABLE statement, as parsed from SQL
352-
*/
353-
case class LoadDataStatement(
354-
tableName: Seq[String],
355-
path: String,
356-
isLocal: Boolean,
357-
isOverwrite: Boolean,
358-
partition: Option[TablePartitionSpec]) extends ParsedStatement
359-
360350
/**
361351
* A SHOW CREATE TABLE statement, as parsed from SQL.
362352
*/

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ case class ReplaceTableAsSelect(
261261
}
262262

263263
/**
264-
* The logical plan of the CREATE NAMESPACE command that works for v2 catalogs.
264+
* The logical plan of the CREATE NAMESPACE command.
265265
*/
266266
case class CreateNamespace(
267267
catalog: SupportsNamespaces,
@@ -270,7 +270,7 @@ case class CreateNamespace(
270270
properties: Map[String, String]) extends Command
271271

272272
/**
273-
* The logical plan of the DROP NAMESPACE command that works for v2 catalogs.
273+
* The logical plan of the DROP NAMESPACE command.
274274
*/
275275
case class DropNamespace(
276276
namespace: LogicalPlan,
@@ -280,7 +280,7 @@ case class DropNamespace(
280280
}
281281

282282
/**
283-
* The logical plan of the DESCRIBE NAMESPACE command that works for v2 catalogs.
283+
* The logical plan of the DESCRIBE NAMESPACE command.
284284
*/
285285
case class DescribeNamespace(
286286
namespace: LogicalPlan,
@@ -296,7 +296,7 @@ case class DescribeNamespace(
296296

297297
/**
298298
* The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET (DBPROPERTIES|PROPERTIES)
299-
* command that works for v2 catalogs.
299+
* command.
300300
*/
301301
case class AlterNamespaceSetProperties(
302302
namespace: LogicalPlan,
@@ -305,8 +305,7 @@ case class AlterNamespaceSetProperties(
305305
}
306306

307307
/**
308-
* The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION
309-
* command that works for v2 catalogs.
308+
* The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION command.
310309
*/
311310
case class AlterNamespaceSetLocation(
312311
namespace: LogicalPlan,
@@ -315,7 +314,7 @@ case class AlterNamespaceSetLocation(
315314
}
316315

317316
/**
318-
* The logical plan of the SHOW NAMESPACES command that works for v2 catalogs.
317+
* The logical plan of the SHOW NAMESPACES command.
319318
*/
320319
case class ShowNamespaces(
321320
namespace: LogicalPlan,
@@ -327,7 +326,7 @@ case class ShowNamespaces(
327326
}
328327

329328
/**
330-
* The logical plan of the DESCRIBE relation_name command that works for v2 tables.
329+
* The logical plan of the DESCRIBE relation_name command.
331330
*/
332331
case class DescribeRelation(
333332
relation: LogicalPlan,
@@ -338,7 +337,7 @@ case class DescribeRelation(
338337
}
339338

340339
/**
341-
* The logical plan of the DESCRIBE relation_name col_name command that works for v2 tables.
340+
* The logical plan of the DESCRIBE relation_name col_name command.
342341
*/
343342
case class DescribeColumn(
344343
relation: LogicalPlan,
@@ -349,7 +348,7 @@ case class DescribeColumn(
349348
}
350349

351350
/**
352-
* The logical plan of the DELETE FROM command that works for v2 tables.
351+
* The logical plan of the DELETE FROM command.
353352
*/
354353
case class DeleteFromTable(
355354
table: LogicalPlan,
@@ -358,7 +357,7 @@ case class DeleteFromTable(
358357
}
359358

360359
/**
361-
* The logical plan of the UPDATE TABLE command that works for v2 tables.
360+
* The logical plan of the UPDATE TABLE command.
362361
*/
363362
case class UpdateTable(
364363
table: LogicalPlan,
@@ -368,7 +367,7 @@ case class UpdateTable(
368367
}
369368

370369
/**
371-
* The logical plan of the MERGE INTO command that works for v2 tables.
370+
* The logical plan of the MERGE INTO command.
372371
*/
373372
case class MergeIntoTable(
374373
targetTable: LogicalPlan,
@@ -407,7 +406,7 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit
407406
}
408407

409408
/**
410-
* The logical plan of the DROP TABLE command that works for v2 tables.
409+
* The logical plan of the DROP TABLE command.
411410
*/
412411
case class DropTable(
413412
child: LogicalPlan,
@@ -422,7 +421,7 @@ case class DropTable(
422421
case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command
423422

424423
/**
425-
* The logical plan of the ALTER TABLE command that works for v2 tables.
424+
* The logical plan of the ALTER TABLE command.
426425
*/
427426
case class AlterTable(
428427
catalog: TableCatalog,
@@ -454,15 +453,15 @@ case class AlterTable(
454453
}
455454

456455
/**
457-
* The logical plan of the ALTER TABLE RENAME command that works for v2 tables.
456+
* The logical plan of the ALTER TABLE RENAME command.
458457
*/
459458
case class RenameTable(
460459
catalog: TableCatalog,
461460
oldIdent: Identifier,
462461
newIdent: Identifier) extends Command
463462

464463
/**
465-
* The logical plan of the SHOW TABLE command that works for v2 catalogs.
464+
* The logical plan of the SHOW TABLE command.
466465
*/
467466
case class ShowTables(
468467
namespace: LogicalPlan,
@@ -475,7 +474,7 @@ case class ShowTables(
475474
}
476475

477476
/**
478-
* The logical plan of the SHOW VIEWS command that works for v1 and v2 catalogs.
477+
* The logical plan of the SHOW VIEWS command.
479478
*
480479
* Notes: v2 catalogs do not support views API yet, the command will fallback to
481480
* v1 ShowViewsCommand during ResolveSessionCatalog.
@@ -491,22 +490,22 @@ case class ShowViews(
491490
}
492491

493492
/**
494-
* The logical plan of the USE/USE NAMESPACE command that works for v2 catalogs.
493+
* The logical plan of the USE/USE NAMESPACE command.
495494
*/
496495
case class SetCatalogAndNamespace(
497496
catalogManager: CatalogManager,
498497
catalogName: Option[String],
499498
namespace: Option[Seq[String]]) extends Command
500499

501500
/**
502-
* The logical plan of the REFRESH TABLE command that works for v2 catalogs.
501+
* The logical plan of the REFRESH TABLE command.
503502
*/
504503
case class RefreshTable(child: LogicalPlan) extends Command {
505504
override def children: Seq[LogicalPlan] = child :: Nil
506505
}
507506

508507
/**
509-
* The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs.
508+
* The logical plan of the SHOW CURRENT NAMESPACE command.
510509
*/
511510
case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command {
512511
override val output: Seq[Attribute] = Seq(
@@ -515,7 +514,7 @@ case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command
515514
}
516515

517516
/**
518-
* The logical plan of the SHOW TBLPROPERTIES command that works for v2 catalogs.
517+
* The logical plan of the SHOW TBLPROPERTIES command.
519518
*/
520519
case class ShowTableProperties(
521520
table: LogicalPlan,
@@ -556,21 +555,21 @@ case class CommentOnTable(child: LogicalPlan, comment: String) extends Command {
556555
}
557556

558557
/**
559-
* The logical plan of the REFRESH FUNCTION command that works for v2 catalogs.
558+
* The logical plan of the REFRESH FUNCTION command.
560559
*/
561560
case class RefreshFunction(child: LogicalPlan) extends Command {
562561
override def children: Seq[LogicalPlan] = child :: Nil
563562
}
564563

565564
/**
566-
* The logical plan of the DESCRIBE FUNCTION command that works for v2 catalogs.
565+
* The logical plan of the DESCRIBE FUNCTION command.
567566
*/
568567
case class DescribeFunction(child: LogicalPlan, isExtended: Boolean) extends Command {
569568
override def children: Seq[LogicalPlan] = child :: Nil
570569
}
571570

572571
/**
573-
* The logical plan of the DROP FUNCTION command that works for v2 catalogs.
572+
* The logical plan of the DROP FUNCTION command.
574573
*/
575574
case class DropFunction(
576575
child: LogicalPlan,
@@ -580,7 +579,7 @@ case class DropFunction(
580579
}
581580

582581
/**
583-
* The logical plan of the SHOW FUNCTIONS command that works for v2 catalogs.
582+
* The logical plan of the SHOW FUNCTIONS command.
584583
*/
585584
case class ShowFunctions(
586585
child: Option[LogicalPlan],
@@ -591,7 +590,7 @@ case class ShowFunctions(
591590
}
592591

593592
/**
594-
* The logical plan of the ANALYZE TABLE command that works for v2 catalogs.
593+
* The logical plan of the ANALYZE TABLE command.
595594
*/
596595
case class AnalyzeTable(
597596
child: LogicalPlan,
@@ -601,7 +600,7 @@ case class AnalyzeTable(
601600
}
602601

603602
/**
604-
* The logical plan of the ANALYZE TABLE FOR COLUMNS command that works for v2 catalogs.
603+
* The logical plan of the ANALYZE TABLE FOR COLUMNS command.
605604
*/
606605
case class AnalyzeColumn(
607606
child: LogicalPlan,
@@ -611,3 +610,15 @@ case class AnalyzeColumn(
611610
"mutually exclusive. Only one of them should be specified.")
612611
override def children: Seq[LogicalPlan] = child :: Nil
613612
}
613+
614+
/**
615+
* The logical plan of the LOAD DATA INTO TABLE command.
616+
*/
617+
case class LoadData(
618+
child: LogicalPlan,
619+
path: String,
620+
isLocal: Boolean,
621+
isOverwrite: Boolean,
622+
partition: Option[TablePartitionSpec]) extends Command {
623+
override def children: Seq[LogicalPlan] = child :: Nil
624+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,24 +1605,24 @@ class DDLParserSuite extends AnalysisTest {
16051605
test("LOAD DATA INTO table") {
16061606
comparePlans(
16071607
parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"),
1608-
LoadDataStatement(Seq("a", "b", "c"), "filepath", false, false, None))
1608+
LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", false, false, None))
16091609

16101610
comparePlans(
16111611
parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"),
1612-
LoadDataStatement(Seq("a", "b", "c"), "filepath", true, false, None))
1612+
LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, false, None))
16131613

16141614
comparePlans(
16151615
parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"),
1616-
LoadDataStatement(Seq("a", "b", "c"), "filepath", true, true, None))
1616+
LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, None))
16171617

16181618
comparePlans(
16191619
parsePlan(
16201620
s"""
16211621
|LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c
16221622
|PARTITION(ds='2017-06-10')
16231623
""".stripMargin),
1624-
LoadDataStatement(
1625-
Seq("a", "b", "c"),
1624+
LoadData(
1625+
UnresolvedTable(Seq("a", "b", "c")),
16261626
"filepath",
16271627
true,
16281628
true,

sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,8 @@ class ResolveSessionCatalog(
312312
ignoreIfExists = c.ifNotExists)
313313
}
314314

315-
case RefreshTable(r @ ResolvedTable(_, _, _: V1Table)) if isSessionCatalog(r.catalog) =>
316-
RefreshTableCommand(r.identifier.asTableIdentifier)
315+
case RefreshTable(ResolvedV1TableIdentifier(ident)) =>
316+
RefreshTableCommand(ident.asTableIdentifier)
317317

318318
case RefreshTable(r: ResolvedView) =>
319319
RefreshTableCommand(r.identifier.asTableIdentifier)
@@ -358,9 +358,8 @@ class ResolveSessionCatalog(
358358
orCreate = c.orCreate)
359359
}
360360

361-
case DropTable(
362-
r @ ResolvedTable(_, _, _: V1Table), ifExists, purge) if isSessionCatalog(r.catalog) =>
363-
DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
361+
case DropTable(ResolvedV1TableIdentifier(ident), ifExists, purge) =>
362+
DropTableCommand(ident.asTableIdentifier, ifExists, isView = false, purge = purge)
364363

365364
// v1 DROP TABLE supports temp view.
366365
case DropTable(r: ResolvedView, ifExists, purge) =>
@@ -427,10 +426,9 @@ class ResolveSessionCatalog(
427426
v1TableName.asTableIdentifier,
428427
"MSCK REPAIR TABLE")
429428

430-
case LoadDataStatement(tbl, path, isLocal, isOverwrite, partition) =>
431-
val v1TableName = parseV1Table(tbl, "LOAD DATA")
429+
case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
432430
LoadDataCommand(
433-
v1TableName.asTableIdentifier,
431+
ident.asTableIdentifier,
434432
path,
435433
isLocal,
436434
isOverwrite,
@@ -573,9 +571,8 @@ class ResolveSessionCatalog(
573571
"SHOW VIEWS, only SessionCatalog supports this command.")
574572
}
575573

576-
case ShowTableProperties(
577-
r @ ResolvedTable(_, _, _: V1Table), propertyKey) if isSessionCatalog(r.catalog) =>
578-
ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
574+
case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey) =>
575+
ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey)
579576

580577
case ShowTableProperties(r: ResolvedView, propertyKey) =>
581578
ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
@@ -696,9 +693,16 @@ class ResolveSessionCatalog(
696693
}
697694
}
698695

699-
object ResolvedV1TableOrViewIdentifier {
696+
object ResolvedV1TableIdentifier {
700697
def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
701698
case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident)
699+
case _ => None
700+
}
701+
}
702+
703+
object ResolvedV1TableOrViewIdentifier {
704+
def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
705+
case ResolvedV1TableIdentifier(ident) => Some(ident)
702706
case ResolvedView(ident, _) => Some(ident)
703707
case _ => None
704708
}

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
283283
case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) =>
284284
throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.")
285285

286+
case LoadData(_: ResolvedTable, _, _, _, _) =>
287+
throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
288+
286289
case _ => Nil
287290
}
288291
}

0 commit comments

Comments
 (0)