diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index 8de2663a98094..cd4070b391234 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -185,9 +185,23 @@ object NestedColumnAliasing { plan: LogicalPlan, nestedFieldToAlias: Map[Expression, Alias], attrToAliases: AttributeMap[Seq[Alias]]): LogicalPlan = { - plan.withNewChildren(plan.children.map { plan => - Project(plan.output.flatMap(a => attrToAliases.getOrElse(a, Seq(a))), plan) - }).transformExpressions { + // withNewChildren is dangerous for Generate; + // one has to adjust unrequiredChildIndex accordingly in this case + val withNewChildren = plan match { + case g: Generate => + val unrequiredSet = g.unrequiredChildIndex.toSet + val flagRes = g.child.output.zipWithIndex.flatMap( t => + attrToAliases.getOrElse(t._1, Seq(t._1)).map( e => ( e, unrequiredSet.contains(t._2) ) ) + ) + val unrequiredChildIndex = flagRes.map(_._2).zipWithIndex.filter(t => t._1).map(_._2) + g.copy( + child = Project(flagRes.map(_._1), g.child), + unrequiredChildIndex = unrequiredChildIndex + ) + case _ => plan.withNewChildren(plan.children.map { plan => + Project(plan.output.flatMap(a => attrToAliases.getOrElse(a, Seq(a))), plan)}) + } + withNewChildren.transformExpressions { case f: ExtractValue if nestedFieldToAlias.contains(f.canonicalized) => nestedFieldToAlias(f.canonicalized).toAttribute }