Skip to content

Commit d9e30c5

Browse files
cloud-fanyhuai
authored andcommitted
[SPARK-10656][SQL] completely support special chars in DataFrame
the main problem is: we interpret column name with special handling of `.` for DataFrame. This enables us to write something like `df("a.b")` to get the field `b` of `a`. However, we don't need this feature in `DataFrame.apply("*")` or `DataFrame.withColumnRenamed`. In these 2 cases, the column name is the final name already, we don't need extra process to interpret it. The solution is simple, use `queryExecution.analyzed.output` to get resolved column directly, instead of using `DataFrame.resolve`. close #8811 Author: Wenchen Fan <[email protected]> Closes #9462 from cloud-fan/special-chars.
1 parent b9455d1 commit d9e30c5

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ class DataFrame private[sql](
698698
*/
699699
def col(colName: String): Column = colName match {
700700
case "*" =>
701-
Column(ResolvedStar(schema.fieldNames.map(resolve)))
701+
Column(ResolvedStar(queryExecution.analyzed.output))
702702
case _ =>
703703
val expr = resolve(colName)
704704
Column(expr)
@@ -1259,13 +1259,17 @@ class DataFrame private[sql](
12591259
*/
12601260
def withColumnRenamed(existingName: String, newName: String): DataFrame = {
12611261
val resolver = sqlContext.analyzer.resolver
1262-
val shouldRename = schema.exists(f => resolver(f.name, existingName))
1262+
val output = queryExecution.analyzed.output
1263+
val shouldRename = output.exists(f => resolver(f.name, existingName))
12631264
if (shouldRename) {
1264-
val colNames = schema.map { field =>
1265-
val name = field.name
1266-
if (resolver(name, existingName)) Column(name).as(newName) else Column(name)
1265+
val columns = output.map { col =>
1266+
if (resolver(col.name, existingName)) {
1267+
Column(col).as(newName)
1268+
} else {
1269+
Column(col)
1270+
}
12671271
}
1268-
select(colNames : _*)
1272+
select(columns : _*)
12691273
} else {
12701274
this
12711275
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,4 +1128,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
11281128
}
11291129
}
11301130
}
1131+
1132+
test("SPARK-10656: completely support special chars") {
1133+
val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.")
1134+
checkAnswer(df.select(df("*")), Row(1, "a"))
1135+
checkAnswer(df.withColumnRenamed("d^'a.", "a"), Row(1, "a"))
1136+
}
11311137
}

0 commit comments

Comments
 (0)