|
18 | 18 | package org.apache.spark.sql.hive.execution |
19 | 19 |
|
20 | 20 | import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} |
| 21 | +import org.apache.hadoop.hive.conf.HiveConf |
21 | 22 | import org.apache.hadoop.hive.metastore.MetaStoreUtils |
22 | 23 | import org.apache.hadoop.hive.ql.Context |
23 | 24 | import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive} |
24 | 25 | import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc} |
25 | | -import org.apache.hadoop.hive.serde2.Serializer |
| 26 | +import org.apache.hadoop.hive.serde.serdeConstants |
26 | 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption |
27 | 28 | import org.apache.hadoop.hive.serde2.objectinspector._ |
28 | 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector |
29 | 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector |
| 31 | +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils |
| 32 | +import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Serializer} |
30 | 33 | import org.apache.hadoop.io.Writable |
31 | 34 | import org.apache.hadoop.mapred._ |
32 | 35 |
|
@@ -119,6 +122,38 @@ case class HiveTableScan( |
119 | 122 | Cast(Literal(value), dataType).eval(null) |
120 | 123 | } |
121 | 124 |
|
| 125 | + private def addColumnMetadataToConf(hiveConf: HiveConf) { |
| 126 | + // Specifies IDs and internal names of columns to be scanned. |
| 127 | + val neededColumnIDs = attributes.map(a => relation.output.indexWhere(_.name == a.name): Integer) |
| 128 | + val columnInternalNames = neededColumnIDs.map(HiveConf.getColumnInternalName(_)).mkString(",") |
| 129 | + |
| 130 | + if (attributes.size == relation.output.size) { |
| 131 | + ColumnProjectionUtils.setFullyReadColumns(hiveConf) |
| 132 | + } else { |
| 133 | + ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs) |
| 134 | + } |
| 135 | + |
| 136 | + ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name)) |
| 137 | + |
| 138 | + // Specifies types and object inspectors of columns to be scanned. |
| 139 | + val structOI = ObjectInspectorUtils |
| 140 | + .getStandardObjectInspector( |
| 141 | + relation.tableDesc.getDeserializer.getObjectInspector, |
| 142 | + ObjectInspectorCopyOption.JAVA) |
| 143 | + .asInstanceOf[StructObjectInspector] |
| 144 | + |
| 145 | + val columnTypeNames = structOI |
| 146 | + .getAllStructFieldRefs |
| 147 | + .map(_.getFieldObjectInspector) |
| 148 | + .map(TypeInfoUtils.getTypeInfoFromObjectInspector(_).getTypeName) |
| 149 | + .mkString(",") |
| 150 | + |
| 151 | + hiveConf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypeNames) |
| 152 | + hiveConf.set(serdeConstants.LIST_COLUMNS, columnInternalNames) |
| 153 | + } |
| 154 | + |
| 155 | + addColumnMetadataToConf(sc.hiveconf) |
| 156 | + |
122 | 157 | @transient |
123 | 158 | def inputRdd = if (!relation.hiveQlTable.isPartitioned) { |
124 | 159 | hadoopReader.makeRDDForTable(relation.hiveQlTable) |
@@ -156,18 +191,19 @@ case class HiveTableScan( |
156 | 191 | } else { |
157 | 192 | val mutableRow = new GenericMutableRow(attributes.length) |
158 | 193 | val buffered = iterator.buffered |
159 | | - |
160 | | - (buffered.head match { |
| 194 | + val rowsAndPartitionKeys = buffered.head match { |
161 | 195 | case Array(_, _) => |
162 | 196 | buffered.map { case Array(deserializedRow, partitionKeys: Array[String]) => |
163 | 197 | (deserializedRow, partitionKeys) |
164 | 198 | } |
165 | 199 |
|
166 | 200 | case _ => |
167 | | - buffered.map { deserializedRow => |
168 | | - (deserializedRow, Array.empty[String]) |
| 201 | + buffered.map { |
| 202 | + (_, Array.empty[String]) |
169 | 203 | } |
170 | | - }).map { case (deserializedRow, partitionKeys: Array[String]) => |
| 204 | + } |
| 205 | + |
| 206 | + rowsAndPartitionKeys.map { case (deserializedRow, partitionKeys) => |
171 | 207 | var i = 0 |
172 | 208 |
|
173 | 209 | while (i < attributes.length) { |
|
0 commit comments