|  | 
| 1 |  | -/* | 
| 2 |  | - * Licensed to the Apache Software Foundation (ASF) under one or more | 
| 3 |  | - * contributor license agreements.  See the NOTICE file distributed with | 
| 4 |  | - * this work for additional information regarding copyright ownership. | 
| 5 |  | - * The ASF licenses this file to You under the Apache License, Version 2.0 | 
| 6 |  | - * (the "License"); you may not use this file except in compliance with | 
| 7 |  | - * the License.  You may obtain a copy of the License at | 
| 8 |  | - * | 
| 9 |  | - *    http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | - * | 
| 11 |  | - * Unless required by applicable law or agreed to in writing, software | 
| 12 |  | - * distributed under the License is distributed on an "AS IS" BASIS, | 
| 13 |  | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
| 14 |  | - * See the License for the specific language governing permissions and | 
| 15 |  | - * limitations under the License. | 
| 16 |  | - */ | 
| 17 |  | - | 
| 18 |  | -package org.apache.spark.sql.execution.joins | 
| 19 |  | - | 
| 20 |  | -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys | 
| 21 |  | -import org.apache.spark.sql.catalyst.plans.logical.Join | 
| 22 |  | -import org.apache.spark.sql.types.{IntegerType, DoubleType, StructType} | 
| 23 |  | -import org.apache.spark.sql.{DataFrame, Row} | 
| 24 |  | -import org.apache.spark.sql.catalyst.expressions._ | 
| 25 |  | -import org.apache.spark.sql.catalyst.plans._ | 
| 26 |  | -import org.apache.spark.sql.execution.{EnsureRequirements, joins, SparkPlan, SparkPlanTest} | 
| 27 |  | - | 
| 28 |  | -class OuterJoinSuite extends SparkPlanTest { | 
| 29 |  | - | 
| 30 |  | -  private def testOuterJoin( | 
| 31 |  | -      testName: String, | 
| 32 |  | -      leftRows: DataFrame, | 
| 33 |  | -      rightRows: DataFrame, | 
| 34 |  | -      joinType: JoinType, | 
| 35 |  | -      condition: Expression, | 
| 36 |  | -      expectedAnswer: Seq[Product]): Unit = { | 
| 37 |  | -    val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) | 
| 38 |  | -    ExtractEquiJoinKeys.unapply(join).foreach { | 
| 39 |  | -      case (_, leftKeys, rightKeys, boundCondition, leftChild, rightChild) => | 
| 40 |  | -        test(s"$testName using ShuffledHashOuterJoin") { | 
| 41 |  | -          checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
| 42 |  | -            EnsureRequirements(left.sqlContext).apply( | 
| 43 |  | -              ShuffledHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right)), | 
| 44 |  | -            expectedAnswer.map(Row.fromTuple), | 
| 45 |  | -            sortAnswers = false) | 
| 46 |  | -        } | 
| 47 |  | - | 
| 48 |  | -        if (joinType != FullOuter) { | 
| 49 |  | -          test(s"$testName using BroadcastHashOuterJoin") { | 
| 50 |  | -            checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
| 51 |  | -              BroadcastHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right), | 
| 52 |  | -              expectedAnswer.map(Row.fromTuple), | 
| 53 |  | -              sortAnswers = false) | 
| 54 |  | -          } | 
| 55 |  | -        } | 
| 56 |  | -    } | 
| 57 |  | - | 
| 58 |  | -    test(s"$testName using BroadcastNestedLoopJoin (build=left)") { | 
| 59 |  | -      checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
| 60 |  | -        joins.BroadcastNestedLoopJoin(left, right, joins.BuildLeft, joinType, Some(condition)), | 
| 61 |  | -        expectedAnswer.map(Row.fromTuple), | 
| 62 |  | -        sortAnswers = true) | 
| 63 |  | -    } | 
| 64 |  | - | 
| 65 |  | -    test(s"$testName using BroadcastNestedLoopJoin (build=right)") { | 
| 66 |  | -      checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
| 67 |  | -        joins.BroadcastNestedLoopJoin(left, right, joins.BuildRight, joinType, Some(condition)), | 
| 68 |  | -        expectedAnswer.map(Row.fromTuple), | 
| 69 |  | -        sortAnswers = true) | 
| 70 |  | -    } | 
| 71 |  | -  } | 
| 72 |  | - | 
| 73 |  | -  val left = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( | 
| 74 |  | -    Row(1, 2.0), | 
| 75 |  | -    Row(2, 1.0), | 
| 76 |  | -    Row(3, 3.0), | 
| 77 |  | -    Row(null, null) | 
| 78 |  | -  )), new StructType().add("a", IntegerType).add("b", DoubleType)) | 
| 79 |  | - | 
| 80 |  | -  val right = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( | 
| 81 |  | -    Row(2, 3.0), | 
| 82 |  | -    Row(3, 2.0), | 
| 83 |  | -    Row(4, 1.0), | 
| 84 |  | -    Row(null, null) | 
| 85 |  | -  )), new StructType().add("c", IntegerType).add("d", DoubleType)) | 
| 86 |  | - | 
| 87 |  | -  val condition = { | 
| 88 |  | -    And( | 
| 89 |  | -      (left.col("a") === right.col("c")).expr, | 
| 90 |  | -      LessThan(left.col("b").expr, right.col("d").expr)) | 
| 91 |  | -  } | 
| 92 |  | - | 
| 93 |  | -  // --- Basic outer joins ------------------------------------------------------------------------ | 
| 94 |  | - | 
| 95 |  | -  testOuterJoin( | 
| 96 |  | -    "basic left outer join", | 
| 97 |  | -    left, | 
| 98 |  | -    right, | 
| 99 |  | -    LeftOuter, | 
| 100 |  | -    condition, | 
| 101 |  | -    Seq( | 
| 102 |  | -      (1, 2.0, null, null), | 
| 103 |  | -      (2, 1.0, 2, 3.0), | 
| 104 |  | -      (3, 3.0, null, null), | 
| 105 |  | -      (null, null, null, null) | 
| 106 |  | -    ) | 
| 107 |  | -  ) | 
| 108 |  | - | 
| 109 |  | -  testOuterJoin( | 
| 110 |  | -    "basic right outer join", | 
| 111 |  | -    left, | 
| 112 |  | -    right, | 
| 113 |  | -    RightOuter, | 
| 114 |  | -    condition, | 
| 115 |  | -    Seq( | 
| 116 |  | -      (2, 1.0, 2, 3.0), | 
| 117 |  | -      (null, null, 3, 2.0), | 
| 118 |  | -      (null, null, 4, 1.0), | 
| 119 |  | -      (null, null, null, null) | 
| 120 |  | -    ) | 
| 121 |  | -  ) | 
| 122 |  | - | 
| 123 |  | -  testOuterJoin( | 
| 124 |  | -    "basic full outer join", | 
| 125 |  | -    left, | 
| 126 |  | -    right, | 
| 127 |  | -    FullOuter, | 
| 128 |  | -    condition, | 
| 129 |  | -    Seq( | 
| 130 |  | -      (1, 2.0, null, null), | 
| 131 |  | -      (2, 1.0, 2, 3.0), | 
| 132 |  | -      (3, 3.0, null, null), | 
| 133 |  | -      (null, null, 3, 2.0), | 
| 134 |  | -      (null, null, 4, 1.0), | 
| 135 |  | -      (null, null, null, null), | 
| 136 |  | -      (null, null, null, null) | 
| 137 |  | -    ) | 
| 138 |  | -  ) | 
| 139 |  | - | 
| 140 |  | -  // --- Both inputs empty ------------------------------------------------------------------------ | 
| 141 |  | - | 
| 142 |  | -  testOuterJoin( | 
| 143 |  | -    "left outer join with both inputs empty", | 
| 144 |  | -    left.filter("false"), | 
| 145 |  | -    right.filter("false"), | 
| 146 |  | -    LeftOuter, | 
| 147 |  | -    condition, | 
| 148 |  | -    Seq.empty | 
| 149 |  | -  ) | 
| 150 |  | - | 
| 151 |  | -  testOuterJoin( | 
| 152 |  | -    "right outer join with both inputs empty", | 
| 153 |  | -    left.filter("false"), | 
| 154 |  | -    right.filter("false"), | 
| 155 |  | -    RightOuter, | 
| 156 |  | -    condition, | 
| 157 |  | -    Seq.empty | 
| 158 |  | -  ) | 
| 159 |  | - | 
| 160 |  | -  testOuterJoin( | 
| 161 |  | -    "full outer join with both inputs empty", | 
| 162 |  | -    left.filter("false"), | 
| 163 |  | -    right.filter("false"), | 
| 164 |  | -    FullOuter, | 
| 165 |  | -    condition, | 
| 166 |  | -    Seq.empty | 
| 167 |  | -  ) | 
| 168 |  | -} | 
|  | 1 | +/* | 
|  | 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | 3 | + * contributor license agreements.  See the NOTICE file distributed with | 
|  | 4 | + * this work for additional information regarding copyright ownership. | 
|  | 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | 6 | + * (the "License"); you may not use this file except in compliance with | 
|  | 7 | + * the License.  You may obtain a copy of the License at | 
|  | 8 | + * | 
|  | 9 | + *    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 10 | + * | 
|  | 11 | + * Unless required by applicable law or agreed to in writing, software | 
|  | 12 | + * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 14 | + * See the License for the specific language governing permissions and | 
|  | 15 | + * limitations under the License. | 
|  | 16 | + */ | 
|  | 17 | + | 
|  | 18 | +package org.apache.spark.sql.execution.joins | 
|  | 19 | + | 
|  | 20 | +import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys | 
|  | 21 | +import org.apache.spark.sql.catalyst.plans.logical.Join | 
|  | 22 | +import org.apache.spark.sql.types.{IntegerType, DoubleType, StructType} | 
|  | 23 | +import org.apache.spark.sql.{DataFrame, Row} | 
|  | 24 | +import org.apache.spark.sql.catalyst.expressions._ | 
|  | 25 | +import org.apache.spark.sql.catalyst.plans._ | 
|  | 26 | +import org.apache.spark.sql.execution.{EnsureRequirements, joins, SparkPlan, SparkPlanTest} | 
|  | 27 | + | 
|  | 28 | +class OuterJoinSuite extends SparkPlanTest { | 
|  | 29 | + | 
|  | 30 | +  private def testOuterJoin( | 
|  | 31 | +      testName: String, | 
|  | 32 | +      leftRows: DataFrame, | 
|  | 33 | +      rightRows: DataFrame, | 
|  | 34 | +      joinType: JoinType, | 
|  | 35 | +      condition: Expression, | 
|  | 36 | +      expectedAnswer: Seq[Product]): Unit = { | 
|  | 37 | +    val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) | 
|  | 38 | +    ExtractEquiJoinKeys.unapply(join).foreach { | 
|  | 39 | +      case (_, leftKeys, rightKeys, boundCondition, leftChild, rightChild) => | 
|  | 40 | +        test(s"$testName using ShuffledHashOuterJoin") { | 
|  | 41 | +          checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
|  | 42 | +            EnsureRequirements(left.sqlContext).apply( | 
|  | 43 | +              ShuffledHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right)), | 
|  | 44 | +            expectedAnswer.map(Row.fromTuple), | 
|  | 45 | +            sortAnswers = false) | 
|  | 46 | +        } | 
|  | 47 | + | 
|  | 48 | +        if (joinType != FullOuter) { | 
|  | 49 | +          test(s"$testName using BroadcastHashOuterJoin") { | 
|  | 50 | +            checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
|  | 51 | +              BroadcastHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right), | 
|  | 52 | +              expectedAnswer.map(Row.fromTuple), | 
|  | 53 | +              sortAnswers = false) | 
|  | 54 | +          } | 
|  | 55 | +        } | 
|  | 56 | +    } | 
|  | 57 | + | 
|  | 58 | +    test(s"$testName using BroadcastNestedLoopJoin (build=left)") { | 
|  | 59 | +      checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
|  | 60 | +        joins.BroadcastNestedLoopJoin(left, right, joins.BuildLeft, joinType, Some(condition)), | 
|  | 61 | +        expectedAnswer.map(Row.fromTuple), | 
|  | 62 | +        sortAnswers = true) | 
|  | 63 | +    } | 
|  | 64 | + | 
|  | 65 | +    test(s"$testName using BroadcastNestedLoopJoin (build=right)") { | 
|  | 66 | +      checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => | 
|  | 67 | +        joins.BroadcastNestedLoopJoin(left, right, joins.BuildRight, joinType, Some(condition)), | 
|  | 68 | +        expectedAnswer.map(Row.fromTuple), | 
|  | 69 | +        sortAnswers = true) | 
|  | 70 | +    } | 
|  | 71 | +  } | 
|  | 72 | + | 
|  | 73 | +  val left = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( | 
|  | 74 | +    Row(1, 2.0), | 
|  | 75 | +    Row(2, 1.0), | 
|  | 76 | +    Row(3, 3.0), | 
|  | 77 | +    Row(null, null) | 
|  | 78 | +  )), new StructType().add("a", IntegerType).add("b", DoubleType)) | 
|  | 79 | + | 
|  | 80 | +  val right = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( | 
|  | 81 | +    Row(2, 3.0), | 
|  | 82 | +    Row(3, 2.0), | 
|  | 83 | +    Row(4, 1.0), | 
|  | 84 | +    Row(null, null) | 
|  | 85 | +  )), new StructType().add("c", IntegerType).add("d", DoubleType)) | 
|  | 86 | + | 
|  | 87 | +  val condition = { | 
|  | 88 | +    And( | 
|  | 89 | +      (left.col("a") === right.col("c")).expr, | 
|  | 90 | +      LessThan(left.col("b").expr, right.col("d").expr)) | 
|  | 91 | +  } | 
|  | 92 | + | 
|  | 93 | +  // --- Basic outer joins ------------------------------------------------------------------------ | 
|  | 94 | + | 
|  | 95 | +  testOuterJoin( | 
|  | 96 | +    "basic left outer join", | 
|  | 97 | +    left, | 
|  | 98 | +    right, | 
|  | 99 | +    LeftOuter, | 
|  | 100 | +    condition, | 
|  | 101 | +    Seq( | 
|  | 102 | +      (1, 2.0, null, null), | 
|  | 103 | +      (2, 1.0, 2, 3.0), | 
|  | 104 | +      (3, 3.0, null, null), | 
|  | 105 | +      (null, null, null, null) | 
|  | 106 | +    ) | 
|  | 107 | +  ) | 
|  | 108 | + | 
|  | 109 | +  testOuterJoin( | 
|  | 110 | +    "basic right outer join", | 
|  | 111 | +    left, | 
|  | 112 | +    right, | 
|  | 113 | +    RightOuter, | 
|  | 114 | +    condition, | 
|  | 115 | +    Seq( | 
|  | 116 | +      (2, 1.0, 2, 3.0), | 
|  | 117 | +      (null, null, 3, 2.0), | 
|  | 118 | +      (null, null, 4, 1.0), | 
|  | 119 | +      (null, null, null, null) | 
|  | 120 | +    ) | 
|  | 121 | +  ) | 
|  | 122 | + | 
|  | 123 | +  testOuterJoin( | 
|  | 124 | +    "basic full outer join", | 
|  | 125 | +    left, | 
|  | 126 | +    right, | 
|  | 127 | +    FullOuter, | 
|  | 128 | +    condition, | 
|  | 129 | +    Seq( | 
|  | 130 | +      (1, 2.0, null, null), | 
|  | 131 | +      (2, 1.0, 2, 3.0), | 
|  | 132 | +      (3, 3.0, null, null), | 
|  | 133 | +      (null, null, 3, 2.0), | 
|  | 134 | +      (null, null, 4, 1.0), | 
|  | 135 | +      (null, null, null, null), | 
|  | 136 | +      (null, null, null, null) | 
|  | 137 | +    ) | 
|  | 138 | +  ) | 
|  | 139 | + | 
|  | 140 | +  // --- Both inputs empty ------------------------------------------------------------------------ | 
|  | 141 | + | 
|  | 142 | +  testOuterJoin( | 
|  | 143 | +    "left outer join with both inputs empty", | 
|  | 144 | +    left.filter("false"), | 
|  | 145 | +    right.filter("false"), | 
|  | 146 | +    LeftOuter, | 
|  | 147 | +    condition, | 
|  | 148 | +    Seq.empty | 
|  | 149 | +  ) | 
|  | 150 | + | 
|  | 151 | +  testOuterJoin( | 
|  | 152 | +    "right outer join with both inputs empty", | 
|  | 153 | +    left.filter("false"), | 
|  | 154 | +    right.filter("false"), | 
|  | 155 | +    RightOuter, | 
|  | 156 | +    condition, | 
|  | 157 | +    Seq.empty | 
|  | 158 | +  ) | 
|  | 159 | + | 
|  | 160 | +  testOuterJoin( | 
|  | 161 | +    "full outer join with both inputs empty", | 
|  | 162 | +    left.filter("false"), | 
|  | 163 | +    right.filter("false"), | 
|  | 164 | +    FullOuter, | 
|  | 165 | +    condition, | 
|  | 166 | +    Seq.empty | 
|  | 167 | +  ) | 
|  | 168 | +} | 
0 commit comments