diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java new file mode 100644 index 0000000000000..62fcec824d09a --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql; + +import org.apache.spark.sql.catalyst.expressions.GenericRow; + +/** + * A factory class used to construct {@link Row} objects. + */ +public class RowFactory { + + /** + * Create a {@link Row} from an array of values. Position i in the array becomes position i + * in the created {@link Row} object. + */ + public static Row create(Object[] values) { + return new GenericRow(values); + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala new file mode 100644 index 0000000000000..d7a4e014ce6a6 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.expressions.GenericRow + + +object Row { + /** + * This method can be used to extract fields from a [[Row]] object in a pattern match. Example: + * {{{ + * import org.apache.spark.sql._ + * + * val pairs = sql("SELECT key, value FROM src").rdd.map { + * case Row(key: Int, value: String) => + * key -> value + * } + * }}} + */ + def unapplySeq(row: Row): Some[Seq[Any]] = Some(row) + + /** + * This method can be used to construct a [[Row]] with the given values. + */ + def apply(values: Any*): Row = new GenericRow(values.toArray) + + /** + * This method can be used to construct a [[Row]] from a [[Seq]] of values. + */ + def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray) +} + + +/** + * Represents one row of output from a relational operator. Allows both generic access by ordinal, + * which will incur boxing overhead for primitives, as well as native primitive access. + * + * It is invalid to use the native primitive interface to retrieve a value that is null, instead a + * user must check `isNullAt` before attempting to retrieve a value that might be null. + * + * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala. + * + * A [[Row]] object can be constructed by providing field values. Example: + * {{{ + * import org.apache.spark.sql._ + * + * // Create a Row from values. + * Row(value1, value2, value3, ...) + * // Create a Row from a Seq of values. + * Row.fromSeq(Seq(value1, value2, ...)) + * }}} + * + * A value of a row can be accessed through both generic access by ordinal, + * which will incur boxing overhead for primitives, as well as native primitive access. + * An example of generic access by ordinal: + * {{{ + * import org.apache.spark.sql._ + * + * val row = Row(1, true, "a string", null) + * // row: Row = [1,true,a string,null] + * val firstValue = row(0) + * // firstValue: Any = 1 + * val fourthValue = row(3) + * // fourthValue: Any = null + * }}} + * + * For native primitive access, it is invalid to use the native primitive interface to retrieve + * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a + * value that might be null. + * An example of native primitive access: + * {{{ + * // using the row from the previous example. + * val firstValue = row.getInt(0) + * // firstValue: Int = 1 + * val isNull = row.isNullAt(3) + * // isNull: Boolean = true + * }}} + * + * Interfaces related to native primitive access are: + * + * `isNullAt(i: Int): Boolean` + * + * `getInt(i: Int): Int` + * + * `getLong(i: Int): Long` + * + * `getDouble(i: Int): Double` + * + * `getFloat(i: Int): Float` + * + * `getBoolean(i: Int): Boolean` + * + * `getShort(i: Int): Short` + * + * `getByte(i: Int): Byte` + * + * `getString(i: Int): String` + * + * In Scala, fields in a [[Row]] object can be extracted in a pattern match. Example: + * {{{ + * import org.apache.spark.sql._ + * + * val pairs = sql("SELECT key, value FROM src").rdd.map { + * case Row(key: Int, value: String) => + * key -> value + * } + * }}} + * + * @group row + */ +trait Row extends Seq[Any] with Serializable { + def apply(i: Int): Any + + /** Returns the value at position i. If the value is null, null is returned. */ + def get(i: Int): Any = apply(i) + + /** Checks whether the value at position i is null. */ + def isNullAt(i: Int): Boolean + + /** + * Returns the value at position i as a primitive int. + * Throws an exception if the type mismatches or if the value is null. + */ + def getInt(i: Int): Int + + /** + * Returns the value at position i as a primitive long. + * Throws an exception if the type mismatches or if the value is null. + */ + def getLong(i: Int): Long + + /** + * Returns the value at position i as a primitive double. + * Throws an exception if the type mismatches or if the value is null. + */ + def getDouble(i: Int): Double + + /** + * Returns the value at position i as a primitive float. + * Throws an exception if the type mismatches or if the value is null. + */ + def getFloat(i: Int): Float + + /** + * Returns the value at position i as a primitive boolean. + * Throws an exception if the type mismatches or if the value is null. + */ + def getBoolean(i: Int): Boolean + + /** + * Returns the value at position i as a primitive short. + * Throws an exception if the type mismatches or if the value is null. + */ + def getShort(i: Int): Short + + /** + * Returns the value at position i as a primitive byte. + * Throws an exception if the type mismatches or if the value is null. + */ + def getByte(i: Int): Byte + + /** + * Returns the value at position i as a String object. + * Throws an exception if the type mismatches or if the value is null. + */ + def getString(i: Int): String + + /** + * Return the value at position i of array type as a Scala Seq. + * Throws an exception if the type mismatches. + */ + def getSeq[T](i: Int): Seq[T] = apply(i).asInstanceOf[Seq[T]] + + /** + * Return the value at position i of array type as [[java.util.List]]. + * Throws an exception if the type mismatches. + */ + def getList[T](i: Int): java.util.List[T] = { + scala.collection.JavaConversions.seqAsJavaList(getSeq[T](i)) + } + + /** + * Return the value at position i of map type as a Scala Map. + * Throws an exception if the type mismatches. + */ + def getMap[K, V](i: Int): scala.collection.Map[K, V] = apply(i).asInstanceOf[Map[K, V]] + + /** + * Return the value at position i of array type as a [[java.util.Map]]. + * Throws an exception if the type mismatches. + */ + def getJavaMap[K, V](i: Int): java.util.Map[K, V] = { + scala.collection.JavaConversions.mapAsJavaMap(getMap[K, V](i)) + } + + /** + * Return the value at position i of struct type as an [[Row]] object. + * Throws an exception if the type mismatches. + */ + def getStruct(i: Int): Row = getAs[Row](i) + + /** + * Returns the value at position i. + * Throws an exception if the type mismatches. + */ + def getAs[T](i: Int): T = apply(i).asInstanceOf[T] + + override def toString(): String = s"[${this.mkString(",")}]" + + /** + * Make a copy of the current [[Row]] object. + */ + def copy(): Row + + /** Returns true if there are any NULL values in this row. */ + def anyNull: Boolean = { + val l = length + var i = 0 + while (i < l) { + if (isNullAt(i)) { return true } + i += 1 + } + false + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 55d95991c5f11..fbc97b2e75312 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -49,6 +49,10 @@ package org.apache.spark.sql.catalyst */ package object expressions { + type Row = org.apache.spark.sql.Row + + val Row = org.apache.spark.sql.Row + /** * Converts a [[Row]] to another Row given a sequence of expression that define each column of the * new row. If the schema of the input row is specified, then the given expression will be bound diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala similarity index 68% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala index dcda53bb717a9..c22b8426841da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala @@ -19,68 +19,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.types.NativeType -object Row { - /** - * This method can be used to extract fields from a [[Row]] object in a pattern match. Example: - * {{{ - * import org.apache.spark.sql._ - * - * val pairs = sql("SELECT key, value FROM src").rdd.map { - * case Row(key: Int, value: String) => - * key -> value - * } - * }}} - */ - def unapplySeq(row: Row): Some[Seq[Any]] = Some(row) - - /** - * This method can be used to construct a [[Row]] with the given values. - */ - def apply(values: Any*): Row = new GenericRow(values.toArray) - - /** - * This method can be used to construct a [[Row]] from a [[Seq]] of values. - */ - def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray) -} - -/** - * Represents one row of output from a relational operator. Allows both generic access by ordinal, - * which will incur boxing overhead for primitives, as well as native primitive access. - * - * It is invalid to use the native primitive interface to retrieve a value that is null, instead a - * user must check [[isNullAt]] before attempting to retrieve a value that might be null. - */ -trait Row extends Seq[Any] with Serializable { - def apply(i: Int): Any - - def isNullAt(i: Int): Boolean - - def getInt(i: Int): Int - def getLong(i: Int): Long - def getDouble(i: Int): Double - def getFloat(i: Int): Float - def getBoolean(i: Int): Boolean - def getShort(i: Int): Short - def getByte(i: Int): Byte - def getString(i: Int): String - def getAs[T](i: Int): T = apply(i).asInstanceOf[T] - - override def toString() = - s"[${this.mkString(",")}]" - - def copy(): Row - - /** Returns true if there are any NULL values in this row. */ - def anyNull: Boolean = { - var i = 0 - while (i < length) { - if (isNullAt(i)) { return true } - i += 1 - } - false - } -} /** * An extended interface to [[Row]] that allows the values for each column to be updated. Setting @@ -105,22 +43,19 @@ trait MutableRow extends Row { * A row with no data. Calling any methods will result in an error. Can be used as a placeholder. */ object EmptyRow extends Row { - def apply(i: Int): Any = throw new UnsupportedOperationException - - def iterator = Iterator.empty - def length = 0 - def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException - - def getInt(i: Int): Int = throw new UnsupportedOperationException - def getLong(i: Int): Long = throw new UnsupportedOperationException - def getDouble(i: Int): Double = throw new UnsupportedOperationException - def getFloat(i: Int): Float = throw new UnsupportedOperationException - def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException - def getShort(i: Int): Short = throw new UnsupportedOperationException - def getByte(i: Int): Byte = throw new UnsupportedOperationException - def getString(i: Int): String = throw new UnsupportedOperationException + override def apply(i: Int): Any = throw new UnsupportedOperationException + override def iterator = Iterator.empty + override def length = 0 + override def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException + override def getInt(i: Int): Int = throw new UnsupportedOperationException + override def getLong(i: Int): Long = throw new UnsupportedOperationException + override def getDouble(i: Int): Double = throw new UnsupportedOperationException + override def getFloat(i: Int): Float = throw new UnsupportedOperationException + override def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException + override def getShort(i: Int): Short = throw new UnsupportedOperationException + override def getByte(i: Int): Byte = throw new UnsupportedOperationException + override def getString(i: Int): String = throw new UnsupportedOperationException override def getAs[T](i: Int): T = throw new UnsupportedOperationException - def copy() = this } @@ -135,50 +70,50 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row { def this(size: Int) = this(new Array[Any](size)) - def iterator = values.iterator + override def iterator = values.iterator - def length = values.length + override def length = values.length - def apply(i: Int) = values(i) + override def apply(i: Int) = values(i) - def isNullAt(i: Int) = values(i) == null + override def isNullAt(i: Int) = values(i) == null - def getInt(i: Int): Int = { + override def getInt(i: Int): Int = { if (values(i) == null) sys.error("Failed to check null bit for primitive int value.") values(i).asInstanceOf[Int] } - def getLong(i: Int): Long = { + override def getLong(i: Int): Long = { if (values(i) == null) sys.error("Failed to check null bit for primitive long value.") values(i).asInstanceOf[Long] } - def getDouble(i: Int): Double = { + override def getDouble(i: Int): Double = { if (values(i) == null) sys.error("Failed to check null bit for primitive double value.") values(i).asInstanceOf[Double] } - def getFloat(i: Int): Float = { + override def getFloat(i: Int): Float = { if (values(i) == null) sys.error("Failed to check null bit for primitive float value.") values(i).asInstanceOf[Float] } - def getBoolean(i: Int): Boolean = { + override def getBoolean(i: Int): Boolean = { if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.") values(i).asInstanceOf[Boolean] } - def getShort(i: Int): Short = { + override def getShort(i: Int): Short = { if (values(i) == null) sys.error("Failed to check null bit for primitive short value.") values(i).asInstanceOf[Short] } - def getByte(i: Int): Byte = { + override def getByte(i: Int): Byte = { if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.") values(i).asInstanceOf[Byte] } - def getString(i: Int): String = { + override def getString(i: Int): String = { if (values(i) == null) sys.error("Failed to check null bit for primitive String value.") values(i).asInstanceOf[String] } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala index fa0a355ebc9b0..e38ad63f2e2c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala @@ -727,7 +727,7 @@ object StructType { * // StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false))) * }}} * - * A [[org.apache.spark.sql.catalyst.expressions.Row]] object is used as a value of the StructType. + * A [[org.apache.spark.sql.Row]] object is used as a value of the StructType. * Example: * {{{ * import org.apache.spark.sql._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala index 207e2805fffe3..4faa79af2568a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConversions import scala.math.BigDecimal import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap -import org.apache.spark.sql.catalyst.expressions.{Row => ScalaRow} +import org.apache.spark.sql.{Row => ScalaRow} /** * A result row from a Spark SQL query. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index b75266d5aa409..6dd39be807037 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -33,89 +33,6 @@ import org.apache.spark.sql.execution.SparkPlan */ package object sql { - /** - * :: DeveloperApi :: - * - * Represents one row of output from a relational operator. - * @group row - */ - @DeveloperApi - type Row = catalyst.expressions.Row - - /** - * :: DeveloperApi :: - * - * A [[Row]] object can be constructed by providing field values. Example: - * {{{ - * import org.apache.spark.sql._ - * - * // Create a Row from values. - * Row(value1, value2, value3, ...) - * // Create a Row from a Seq of values. - * Row.fromSeq(Seq(value1, value2, ...)) - * }}} - * - * A value of a row can be accessed through both generic access by ordinal, - * which will incur boxing overhead for primitives, as well as native primitive access. - * An example of generic access by ordinal: - * {{{ - * import org.apache.spark.sql._ - * - * val row = Row(1, true, "a string", null) - * // row: Row = [1,true,a string,null] - * val firstValue = row(0) - * // firstValue: Any = 1 - * val fourthValue = row(3) - * // fourthValue: Any = null - * }}} - * - * For native primitive access, it is invalid to use the native primitive interface to retrieve - * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a - * value that might be null. - * An example of native primitive access: - * {{{ - * // using the row from the previous example. - * val firstValue = row.getInt(0) - * // firstValue: Int = 1 - * val isNull = row.isNullAt(3) - * // isNull: Boolean = true - * }}} - * - * Interfaces related to native primitive access are: - * - * `isNullAt(i: Int): Boolean` - * - * `getInt(i: Int): Int` - * - * `getLong(i: Int): Long` - * - * `getDouble(i: Int): Double` - * - * `getFloat(i: Int): Float` - * - * `getBoolean(i: Int): Boolean` - * - * `getShort(i: Int): Short` - * - * `getByte(i: Int): Byte` - * - * `getString(i: Int): String` - * - * Fields in a [[Row]] object can be extracted in a pattern match. Example: - * {{{ - * import org.apache.spark.sql._ - * - * val pairs = sql("SELECT key, value FROM src").rdd.map { - * case Row(key: Int, value: String) => - * key -> value - * } - * }}} - * - * @group row - */ - @DeveloperApi - val Row = catalyst.expressions.Row - /** * Converts a logical plan into zero or more SparkPlans. */