apache
diff --git a/‎core/src/main/scala/org/apache/spark/Aggregator.scala‎
Lines changed: 1 addition & 4 deletions b/‎core/src/main/scala/org/apache/spark/Aggregator.scala‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/SizeEstimator.scala‎
Lines changed: 51 additions & 10 deletions b/‎core/src/main/scala/org/apache/spark/util/SizeEstimator.scala‎
Lines changed: 51 additions & 10 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala‎
Lines changed: 44 additions & 3 deletions b/‎core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala‎
Lines changed: 44 additions & 3 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala‎
Lines changed: 5 additions & 5 deletions b/‎core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/configuration.md‎
Lines changed: 10 additions & 0 deletions b/‎docs/configuration.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java‎
Lines changed: 18 additions & 1 deletion b/‎launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎python/pyspark/sql/dataframe.py‎
Lines changed: 1 addition & 1 deletion b/‎python/pyspark/sql/dataframe.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala‎
Lines changed: 5 additions & 6 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala‎
Lines changed: 33 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala‎
Lines changed: 2 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala‎
Lines changed: 2 additions & 0 deletions
@@ -88,10 +88,7 @@ case class Aggregator[K, V, C] (
       combiners.iterator
     } else {
       val combiners = new ExternalAppendOnlyMap[K, C, C](identity, mergeCombiners, mergeCombiners)
-      while (iter.hasNext) {
-        val pair = iter.next()
-        combiners.insert(pair._1, pair._2)
-      }
+      combiners.insertAll(iter)
       // Update task metrics if context is not null
       // TODO: Make context non-optional in a future release
       Option(context).foreach { c =>
 
@@ -47,6 +47,11 @@ private[spark] object SizeEstimator extends Logging {
   private val FLOAT_SIZE   = 4
   private val DOUBLE_SIZE  = 8
 
+  // Fields can be primitive types, sizes are: 1, 2, 4, 8. Or fields can be pointers. The size of
+  // a pointer is 4 or 8 depending on the JVM (32-bit or 64-bit) and UseCompressedOops flag.
+  // The sizes should be in descending order, as we will use that information for fields placement.
+  private val fieldSizes = List(8, 4, 2, 1)
+
   // Alignment boundary for objects
   // TODO: Is this arch dependent ?
   private val ALIGN_SIZE = 8
@@ -171,7 +176,7 @@ private[spark] object SizeEstimator extends Logging {
       // general all ClassLoaders and Classes will be shared between objects anyway.
     } else {
       val classInfo = getClassInfo(cls)
-      state.size += classInfo.shellSize
+      state.size += alignSize(classInfo.shellSize)
       for (field <- classInfo.pointerFields) {
         state.enqueue(field.get(obj))
       }
@@ -237,8 +242,8 @@ private[spark] object SizeEstimator extends Logging {
     }
     size
   }
-  
-  private def primitiveSize(cls: Class[_]): Long = {
+
+  private def primitiveSize(cls: Class[_]): Int = {
     if (cls == classOf[Byte]) {
       BYTE_SIZE
     } else if (cls == classOf[Boolean]) {
@@ -274,30 +279,66 @@ private[spark] object SizeEstimator extends Logging {
     val parent = getClassInfo(cls.getSuperclass)
     var shellSize = parent.shellSize
     var pointerFields = parent.pointerFields
+    val sizeCount = Array.fill(fieldSizes.max + 1)(0)
 
+    // iterate through the fields of this class and gather information.
     for (field <- cls.getDeclaredFields) {
       if (!Modifier.isStatic(field.getModifiers)) {
         val fieldClass = field.getType
         if (fieldClass.isPrimitive) {
-          shellSize += primitiveSize(fieldClass)
+          sizeCount(primitiveSize(fieldClass)) += 1
         } else {
           field.setAccessible(true) // Enable future get()'s on this field
-          shellSize += pointerSize
+          sizeCount(pointerSize) += 1
           pointerFields = field :: pointerFields
         }
       }
     }
 
-    shellSize = alignSize(shellSize)
+    // Based on the simulated field layout code in Aleksey Shipilev's report:
+    // http://cr.openjdk.java.net/~shade/papers/2013-shipilev-fieldlayout-latest.pdf
+    // The code is in Figure 9.
+    // The simplified idea of field layout consists of 4 parts (see more details in the report):
+    //
+    // 1. field alignment: HotSpot lays out the fields aligned by their size.
+    // 2. object alignment: HotSpot rounds instance size up to 8 bytes
+    // 3. consistent fields layouts throughout the hierarchy: This means we should layout
+    // superclass first. And we can use superclass's shellSize as a starting point to layout the
+    // other fields in this class.
+    // 4. class alignment: HotSpot rounds field blocks up to to HeapOopSize not 4 bytes, confirmed
+    // with Aleksey. see https://bugs.openjdk.java.net/browse/CODETOOLS-7901322
+    //
+    // The real world field layout is much more complicated. There are three kinds of fields
+    // order in Java 8. And we don't consider the @contended annotation introduced by Java 8.
+    // see the HotSpot classloader code, layout_fields method for more details.
+    // hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/classfile/classFileParser.cpp
+    var alignedSize = shellSize
+    for (size <- fieldSizes if sizeCount(size) > 0) {
+      val count = sizeCount(size)
+      // If there are internal gaps, smaller field can fit in.
+      alignedSize = math.max(alignedSize, alignSizeUp(shellSize, size) + size * count)
+      shellSize += size * count
+    }
+
+    // Should choose a larger size to be new shellSize and clearly alignedSize >= shellSize, and
+    // round up the instance filed blocks
+    shellSize = alignSizeUp(alignedSize, pointerSize)
 
     // Create and cache a new ClassInfo
     val newInfo = new ClassInfo(shellSize, pointerFields)
     classInfos.put(cls, newInfo)
     newInfo
   }
 
-  private def alignSize(size: Long): Long = {
-    val rem = size % ALIGN_SIZE
-    if (rem == 0) size else (size + ALIGN_SIZE - rem)
-  }
+  private def alignSize(size: Long): Long = alignSizeUp(size, ALIGN_SIZE)
+
+  /**
+   * Compute aligned size. The alignSize must be 2^n, otherwise the result will be wrong.
+   * When alignSize = 2^n, alignSize - 1 = 2^n - 1. The binary representation of (alignSize - 1)
+   * will only have n trailing 1s(0b00...001..1). ~(alignSize - 1) will be 0b11..110..0. Hence,
+   * (size + alignSize - 1) & ~(alignSize - 1) will set the last n bits to zeros, which leads to
+   * multiple of alignSize.
+   */
+  private def alignSizeUp(size: Long, alignSize: Int): Long =
+    (size + alignSize - 1) & ~(alignSize - 1)
 }
@@ -36,6 +36,15 @@ class DummyClass4(val d: DummyClass3) {
   val x: Int = 0
 }
 
+// dummy class to show class field blocks alignment.
+class DummyClass5 extends DummyClass1 {
+  val x: Boolean = true
+}
+
+class DummyClass6 extends DummyClass5 {
+  val y: Boolean = true
+}
+
 object DummyString {
   def apply(str: String) : DummyString = new DummyString(str.toArray)
 }
@@ -50,6 +59,7 @@ class SizeEstimatorSuite
 
   override def beforeEach() {
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
+    super.beforeEach()
     System.setProperty("os.arch", "amd64")
     System.setProperty("spark.test.useCompressedOops", "true")
   }
@@ -62,6 +72,22 @@ class SizeEstimatorSuite
     assertResult(48)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
   }
 
+  test("primitive wrapper objects") {
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Boolean(true)))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Byte("1")))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Character('1')))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Short("1")))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Integer(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Long(1)))
+    assertResult(16)(SizeEstimator.estimate(new java.lang.Float(1.0)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Double(1.0d)))
+  }
+
+  test("class field blocks rounding") {
+    assertResult(16)(SizeEstimator.estimate(new DummyClass5))
+    assertResult(24)(SizeEstimator.estimate(new DummyClass6))
+  }
+
   // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
   // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("strings") {
@@ -102,18 +128,18 @@ class SizeEstimatorSuite
     val arr = new Array[Char](100000)
     assertResult(200016)(SizeEstimator.estimate(arr))
     assertResult(480032)(SizeEstimator.estimate(Array.fill(10000)(new DummyString(arr))))
-    
+
     val buf = new ArrayBuffer[DummyString]()
     for (i <- 0 until 5000) {
       buf.append(new DummyString(new Array[Char](10)))
     }
     assertResult(340016)(SizeEstimator.estimate(buf.toArray))
-    
+
     for (i <- 0 until 5000) {
       buf.append(new DummyString(arr))
     }
     assertResult(683912)(SizeEstimator.estimate(buf.toArray))
-    
+
     // If an array contains the *same* element many times, we should only count it once.
     val d1 = new DummyClass1
     // 10 pointers plus 8-byte object
@@ -155,5 +181,20 @@ class SizeEstimatorSuite
     assertResult(64)(SizeEstimator.estimate(DummyString("a")))
     assertResult(64)(SizeEstimator.estimate(DummyString("ab")))
     assertResult(72)(SizeEstimator.estimate(DummyString("abcdefgh")))
+
+    // primitive wrapper classes
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Boolean(true)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Byte("1")))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Character('1')))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Short("1")))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Integer(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Long(1)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Float(1.0)))
+    assertResult(24)(SizeEstimator.estimate(new java.lang.Double(1.0d)))
+  }
+
+  test("class field blocks rounding on 64-bit VM without useCompressedOops") {
+    assertResult(24)(SizeEstimator.estimate(new DummyClass5))
+    assertResult(32)(SizeEstimator.estimate(new DummyClass6))
   }
 }
@@ -377,17 +377,17 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
     val sorter = new ExternalSorter[Int, Int, Int](
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter)
-    sorter.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    sorter.insertAll((0 until 120000).iterator.map(i => (i, i)))
     assert(diskBlockManager.getAllFiles().length > 0)
     sorter.stop()
     assert(diskBlockManager.getAllBlocks().length === 0)
 
     val sorter2 = new ExternalSorter[Int, Int, Int](
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter2)
-    sorter2.insertAll((0 until 100000).iterator.map(i => (i, i)))
+    sorter2.insertAll((0 until 120000).iterator.map(i => (i, i)))
     assert(diskBlockManager.getAllFiles().length > 0)
-    assert(sorter2.iterator.toSet === (0 until 100000).map(i => (i, i)).toSet)
+    assert(sorter2.iterator.toSet === (0 until 120000).map(i => (i, i)).toSet)
     sorter2.stop()
     assert(diskBlockManager.getAllBlocks().length === 0)
   }
@@ -428,8 +428,8 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
       None, Some(new HashPartitioner(3)), Some(ord), None)
     assertDidNotBypassMergeSort(sorter)
     intercept[SparkException] {
-      sorter.insertAll((0 until 100000).iterator.map(i => {
-        if (i == 99990) {
+      sorter.insertAll((0 until 120000).iterator.map(i => {
+        if (i == 119990) {
           throw new SparkException("Intentional failure")
         }
         (i, i)
 
@@ -1464,6 +1464,16 @@ Apart from these, the following properties are also available, and may be useful
     for more details.
   </td>
 </tr>
+<tr>
+  <td><code>spark.streaming.kafka.maxRetries</code></td>
+  <td>1</td>
+  <td>
+    Maximum number of consecutive retries the driver will make in order to find
+    the latest offsets on the leader of each partition (a default value of 1
+    means that the driver will make a maximum of 2 attempts). Only applies to
+    the new Kafka direct stream API.
+  </td>
+</tr>
 </table>
 
 #### Cluster Managers
 
@@ -190,6 +190,10 @@ private List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOE
       firstNonEmptyValue(SparkLauncher.DRIVER_EXTRA_CLASSPATH, conf, props) : null;
 
     List<String> cmd = buildJavaCommand(extraClassPath);
+    // Take Thrift Server as daemon
+    if (isThriftServer(mainClass)) {
+      addOptionString(cmd, System.getenv("SPARK_DAEMON_JAVA_OPTS"));
+    }
     addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
     addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS"));
 
@@ -201,7 +205,11 @@ private List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOE
       // - SPARK_DRIVER_MEMORY env variable
       // - SPARK_MEM env variable
       // - default value (512m)
-      String memory = firstNonEmpty(firstNonEmptyValue(SparkLauncher.DRIVER_MEMORY, conf, props),
+      // Take Thrift Server as daemon
+      String tsMemory =
+        isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null;
+      String memory = firstNonEmpty(tsMemory,
+        firstNonEmptyValue(SparkLauncher.DRIVER_MEMORY, conf, props),
         System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM);
       cmd.add("-Xms" + memory);
       cmd.add("-Xmx" + memory);
@@ -292,6 +300,15 @@ private boolean isClientMode(Properties userProps) {
       (!userMaster.equals("yarn-cluster") && deployMode == null);
   }
 
+  /**
+   * Return whether the given main class represents a thrift server.
+   */
+  private boolean isThriftServer(String mainClass) {
+    return (mainClass != null &&
+      mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"));
+  }
+
+
   private class OptionParser extends SparkSubmitOptionParser {
 
     @Override
 
@@ -1,6 +1,6 @@
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
-# contir[butor license agreements.  See the NOTICE file distributed with
+# contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 
@@ -25,10 +25,6 @@ import scala.util.parsing.input.CharArrayReader.EofCh
 
 import org.apache.spark.sql.catalyst.plans.logical._
 
-private[sql] object KeywordNormalizer {
-  def apply(str: String): String = str.toLowerCase()
-}
-
 private[sql] abstract class AbstractSparkSQLParser
   extends StandardTokenParsers with PackratParsers {
 
@@ -42,7 +38,7 @@ private[sql] abstract class AbstractSparkSQLParser
   }
 
   protected case class Keyword(str: String) {
-    def normalize: String = KeywordNormalizer(str)
+    def normalize: String = lexical.normalizeKeyword(str)
     def parser: Parser[String] = normalize
   }
 
@@ -90,13 +86,16 @@ class SqlLexical extends StdLexical {
     reserved ++= keywords
   }
 
+  /* Normal the keyword string */
+  def normalizeKeyword(str: String): String = str.toLowerCase
+
   delimiters += (
     "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
     ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
   )
 
   protected override def processIdent(name: String) = {
-    val token = KeywordNormalizer(name)
+    val token = normalizeKeyword(name)
     if (reserved contains token) Keyword(token) else Identifier(name)
   }
 
 
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Root class of SQL Parser Dialect, and we don't guarantee the binary
+ * compatibility for the future release, let's keep it as the internal
+ * interface for advanced user.
+ *
+ */
+@DeveloperApi
+abstract class Dialect {
+  // this is the main function that will be implemented by sql parser.
+  def parse(sqlText: String): LogicalPlan
+}
@@ -38,6 +38,8 @@ package object errors {
     }
   }
 
+  class DialectException(msg: String, cause: Throwable) extends Exception(msg, cause)
+
   /**
    *  Wraps any exceptions that are thrown while executing `f` in a
    *  [[catalyst.errors.TreeNodeException TreeNodeException]], attaching the provided `tree`.
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`#`
`2`	`2`	`# Licensed to the Apache Software Foundation (ASF) under one or more`
`3`		`-# contir[butor license agreements. See the NOTICE file distributed with`
	`3`	`+# contributor license agreements. See the NOTICE file distributed with`
`4`	`4`	`# this work for additional information regarding copyright ownership.`
`5`	`5`	`# The ASF licenses this file to You under the Apache License, Version 2.0`
`6`	`6`	`# (the "License"); you may not use this file except in compliance with`
Original file line number	Diff line number	Diff line change
`@@ -25,10 +25,6 @@ import scala.util.parsing.input.CharArrayReader.EofCh`
`25`	`25`
`26`	`26`	`import org.apache.spark.sql.catalyst.plans.logical._`
`27`	`27`
`28`		`-private[sql] object KeywordNormalizer {`
`29`		`- def apply(str: String): String = str.toLowerCase()`
`30`		`-}`
`31`		`-`
`32`	`28`	`private[sql] abstract class AbstractSparkSQLParser`
`33`	`29`	`extends StandardTokenParsers with PackratParsers {`
`34`	`30`
`@@ -42,7 +38,7 @@ private[sql] abstract class AbstractSparkSQLParser`
`42`	`38`	`}`
`43`	`39`
`44`	`40`	`protected case class Keyword(str: String) {`
`45`		`- def normalize: String = KeywordNormalizer(str)`
	`41`	`+ def normalize: String = lexical.normalizeKeyword(str)`
`46`	`42`	`def parser: Parser[String] = normalize`
`47`	`43`	`}`
`48`	`44`
`@@ -90,13 +86,16 @@ class SqlLexical extends StdLexical {`
`90`	`86`	`reserved ++= keywords`
`91`	`87`	`}`
`92`	`88`
	`89`	`+ /* Normal the keyword string */`
	`90`	`+ def normalizeKeyword(str: String): String = str.toLowerCase`
	`91`	`+`
`93`	`92`	`delimiters += (`
`94`	`93`	`"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",`
`95`	`94`	`",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "\|", "^", "~", "<=>"`
`96`	`95`	`)`
`97`	`96`
`98`	`97`	`protected override def processIdent(name: String) = {`
`99`		`- val token = KeywordNormalizer(name)`
	`98`	`+ val token = normalizeKeyword(name)`
`100`	`99`	`if (reserved contains token) Keyword(token) else Identifier(name)`
`101`	`100`	`}`
`102`	`101`
Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,8 @@ package object errors {`
`38`	`38`	`}`
`39`	`39`	`}`
`40`	`40`
	`41`	`+ class DialectException(msg: String, cause: Throwable) extends Exception(msg, cause)`
	`42`	`+`
`41`	`43`	`/**`
`42`	`44`	* Wraps any exceptions that are thrown while executing `f` in a
`43`	`45`	* [[catalyst.errors.TreeNodeException TreeNodeException]], attaching the provided `tree`.