Skip to content

Commit 4b325c7

Browse files
committed
[SPARK-5193][SQL] Tighten up HiveContext API
1. Removed the deprecated LocalHiveContext 2. Made private[sql] fields protected[sql] so they don't show up in javadoc. 3. Added javadoc to refreshTable. 4. Added Experimental tag to analyze command. Author: Reynold Xin <[email protected]> Closes #4054 from rxin/hivecontext-api and squashes the following commits: 25cc00a [Reynold Xin] Add implicit conversion back. cbca886 [Reynold Xin] [SPARK-5193][SQL] Tighten up HiveContext API
1 parent 6abc45e commit 4b325c7

File tree

1 file changed

+13
-35
lines changed

1 file changed

+13
-35
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 13 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.hive
1919

20-
import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
20+
import java.io.{BufferedReader, InputStreamReader, PrintStream}
2121
import java.sql.{Date, Timestamp}
2222

2323
import scala.collection.JavaConversions._
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
3333
import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
3434

3535
import org.apache.spark.SparkContext
36+
import org.apache.spark.annotation.Experimental
3637
import org.apache.spark.sql._
3738
import org.apache.spark.sql.catalyst.ScalaReflection
3839
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators, OverrideCatalog, OverrideFunctionRegistry}
@@ -42,28 +43,6 @@ import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DescribeHiveTable
4243
import org.apache.spark.sql.sources.DataSourceStrategy
4344
import org.apache.spark.sql.types._
4445

45-
/**
46-
* DEPRECATED: Use HiveContext instead.
47-
*/
48-
@deprecated("""
49-
Use HiveContext instead. It will still create a local metastore if one is not specified.
50-
However, note that the default directory is ./metastore_db, not ./metastore
51-
""", "1.1")
52-
class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
53-
54-
lazy val metastorePath = new File("metastore").getCanonicalPath
55-
lazy val warehousePath: String = new File("warehouse").getCanonicalPath
56-
57-
/** Sets up the system initially or after a RESET command */
58-
protected def configure() {
59-
setConf("javax.jdo.option.ConnectionURL",
60-
s"jdbc:derby:;databaseName=$metastorePath;create=true")
61-
setConf("hive.metastore.warehouse.dir", warehousePath)
62-
}
63-
64-
configure() // Must be called before initializing the catalog below.
65-
}
66-
6746
/**
6847
* An instance of the Spark SQL execution engine that integrates with data stored in Hive.
6948
* Configuration for Hive is read from hive-site.xml on the classpath.
@@ -80,7 +59,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
8059
* are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
8160
* SerDe.
8261
*/
83-
private[spark] def convertMetastoreParquet: Boolean =
62+
protected[sql] def convertMetastoreParquet: Boolean =
8463
getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true"
8564

8665
override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
@@ -97,14 +76,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
9776
}
9877
}
9978

100-
@deprecated("hiveql() is deprecated as the sql function now parses using HiveQL by default. " +
101-
s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
102-
def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
103-
104-
@deprecated("hql() is deprecated as the sql function now parses using HiveQL by default. " +
105-
s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
106-
def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
107-
10879
/**
10980
* Creates a table using the schema of the given class.
11081
*
@@ -116,6 +87,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
11687
catalog.createTable("default", tableName, ScalaReflection.attributesFor[A], allowExisting)
11788
}
11889

90+
/**
91+
* Invalidate and refresh all the cached the metadata of the given table. For performance reasons,
92+
* Spark SQL or the external data source library it uses might cache certain metadata about a
93+
* table, such as the location of blocks. When those change outside of Spark SQL, users should
94+
* call this function to invalidate the cache.
95+
*/
11996
def refreshTable(tableName: String): Unit = {
12097
// TODO: Database support...
12198
catalog.refreshTable("default", tableName)
@@ -133,6 +110,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
133110
* Right now, it only supports Hive tables and it only updates the size of a Hive table
134111
* in the Hive metastore.
135112
*/
113+
@Experimental
136114
def analyze(tableName: String) {
137115
val relation = EliminateAnalysisOperators(catalog.lookupRelation(Seq(tableName)))
138116

@@ -289,7 +267,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
289267
results
290268
}
291269

292-
293270
/**
294271
* Execute the command using Hive and return the results as a sequence. Each element
295272
* in the sequence is one row.
@@ -345,7 +322,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
345322
}
346323

347324
@transient
348-
val hivePlanner = new SparkPlanner with HiveStrategies {
325+
private val hivePlanner = new SparkPlanner with HiveStrategies {
349326
val hiveContext = self
350327

351328
override def strategies: Seq[Strategy] = experimental.extraStrategies ++ Seq(
@@ -410,7 +387,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
410387
}
411388
}
412389

413-
object HiveContext {
390+
391+
private object HiveContext {
414392
protected val primitiveTypes =
415393
Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
416394
ShortType, DateType, TimestampType, BinaryType)

0 commit comments

Comments
 (0)