|
22 | 22 | import org.apache.spark.unsafe.types.UTF8String; |
23 | 23 |
|
24 | 24 | /** |
25 | | - * This class represents a column of values and provides the main APIs to access the data |
26 | | - * values. It supports all the types and contains get APIs as well as their batched versions. |
27 | | - * The batched versions are preferable whenever possible. |
| 25 | + * This class represents in-memory values of a column and provides the main APIs to access the data. |
| 26 | + * It supports all the types and contains get APIs as well as their batched versions. The batched |
| 27 | + * versions are considered to be faster and preferable whenever possible. |
28 | 28 | * |
29 | 29 | * To handle nested schemas, ColumnVector has two types: Arrays and Structs. In both cases these |
30 | | - * columns have child columns. All of the data is stored in the child columns and the parent column |
31 | | - * contains nullability, and in the case of Arrays, the lengths and offsets into the child column. |
32 | | - * Lengths and offsets are encoded identically to INTs. |
| 30 | + * columns have child columns. All of the data are stored in the child columns and the parent column |
| 31 | + * only contains nullability. In the case of Arrays, the lengths and offsets are saved in the child |
| 32 | + * column and are encoded identically to INTs. |
| 33 | + * |
33 | 34 | * Maps are just a special case of a two field struct. |
34 | 35 | * |
35 | 36 | * Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for values |
36 | | - * in the current RowBatch. |
37 | | - * |
38 | | - * A ColumnVector should be considered immutable once originally created. |
39 | | - * |
40 | | - * ColumnVectors are intended to be reused. |
| 37 | + * in the current batch. |
41 | 38 | */ |
42 | 39 | public abstract class ColumnVector implements AutoCloseable { |
| 40 | + |
43 | 41 | /** |
44 | 42 | * Returns the data type of this column. |
45 | 43 | */ |
46 | 44 | public final DataType dataType() { return type; } |
47 | 45 |
|
48 | 46 | /** |
49 | 47 | * Cleans up memory for this column. The column is not usable after this. |
50 | | - * TODO: this should probably have ref-counted semantics. |
51 | 48 | */ |
52 | 49 | public abstract void close(); |
53 | 50 |
|
@@ -107,13 +104,6 @@ public abstract class ColumnVector implements AutoCloseable { |
107 | 104 | */ |
108 | 105 | public abstract int[] getInts(int rowId, int count); |
109 | 106 |
|
110 | | - /** |
111 | | - * Returns the dictionary Id for rowId. |
112 | | - * This should only be called when the ColumnVector is dictionaryIds. |
113 | | - * We have this separate method for dictionaryIds as per SPARK-16928. |
114 | | - */ |
115 | | - public abstract int getDictId(int rowId); |
116 | | - |
117 | 107 | /** |
118 | 108 | * Returns the value for rowId. |
119 | 109 | */ |
@@ -145,39 +135,39 @@ public abstract class ColumnVector implements AutoCloseable { |
145 | 135 | public abstract double[] getDoubles(int rowId, int count); |
146 | 136 |
|
147 | 137 | /** |
148 | | - * Returns the length of the array at rowid. |
| 138 | + * Returns the length of the array for rowId. |
149 | 139 | */ |
150 | 140 | public abstract int getArrayLength(int rowId); |
151 | 141 |
|
152 | 142 | /** |
153 | | - * Returns the offset of the array at rowid. |
| 143 | + * Returns the offset of the array for rowId. |
154 | 144 | */ |
155 | 145 | public abstract int getArrayOffset(int rowId); |
156 | 146 |
|
157 | 147 | /** |
158 | | - * Returns a utility object to get structs. |
| 148 | + * Returns the struct for rowId. |
159 | 149 | */ |
160 | 150 | public final ColumnarRow getStruct(int rowId) { |
161 | 151 | return new ColumnarRow(this, rowId); |
162 | 152 | } |
163 | 153 |
|
164 | 154 | /** |
165 | | - * Returns a utility object to get structs. |
166 | | - * provided to keep API compatibility with InternalRow for code generation |
| 155 | + * A special version of {@link #getStruct(int)}, which is only used as an adapter for Spark |
| 156 | + * codegen framework, the second parameter is totally ignored. |
167 | 157 | */ |
168 | 158 | public final ColumnarRow getStruct(int rowId, int size) { |
169 | 159 | return getStruct(rowId); |
170 | 160 | } |
171 | 161 |
|
172 | 162 | /** |
173 | | - * Returns the array at rowid. |
| 163 | + * Returns the array for rowId. |
174 | 164 | */ |
175 | 165 | public final ColumnarArray getArray(int rowId) { |
176 | 166 | return new ColumnarArray(arrayData(), getArrayOffset(rowId), getArrayLength(rowId)); |
177 | 167 | } |
178 | 168 |
|
179 | 169 | /** |
180 | | - * Returns the value for rowId. |
| 170 | + * Returns the map for rowId. |
181 | 171 | */ |
182 | 172 | public MapData getMap(int ordinal) { |
183 | 173 | throw new UnsupportedOperationException(); |
@@ -214,30 +204,6 @@ public MapData getMap(int ordinal) { |
214 | 204 | */ |
215 | 205 | protected DataType type; |
216 | 206 |
|
217 | | - /** |
218 | | - * The Dictionary for this column. |
219 | | - * |
220 | | - * If it's not null, will be used to decode the value in getXXX(). |
221 | | - */ |
222 | | - protected Dictionary dictionary; |
223 | | - |
224 | | - /** |
225 | | - * Reusable column for ids of dictionary. |
226 | | - */ |
227 | | - protected ColumnVector dictionaryIds; |
228 | | - |
229 | | - /** |
230 | | - * Returns true if this column has a dictionary. |
231 | | - */ |
232 | | - public boolean hasDictionary() { return this.dictionary != null; } |
233 | | - |
234 | | - /** |
235 | | - * Returns the underlying integer column for ids of dictionary. |
236 | | - */ |
237 | | - public ColumnVector getDictionaryIds() { |
238 | | - return dictionaryIds; |
239 | | - } |
240 | | - |
241 | 207 | /** |
242 | 208 | * Sets up the common state and also handles creating the child columns if this is a nested |
243 | 209 | * type. |
|
0 commit comments