apache
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala‎
Lines changed: 10 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/test/scala/org/apache/spark/ui/UISuite.scala‎
Lines changed: 5 additions & 9 deletions b/‎core/src/test/scala/org/apache/spark/ui/UISuite.scala‎
Lines changed: 5 additions & 9 deletions
diff --git a/‎docs/_layouts/global.html‎
Lines changed: 2 additions & 1 deletion b/‎docs/_layouts/global.html‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/ec2-scripts.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/ec2-scripts.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/index.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/index.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/sql-programming-guide.md‎
Lines changed: 40 additions & 18 deletions b/‎docs/sql-programming-guide.md‎
Lines changed: 40 additions & 18 deletions
diff --git a/‎ec2/deploy.generic/root/spark-ec2/ec2-variables.sh‎
Lines changed: 2 additions & 0 deletions b/‎ec2/deploy.generic/root/spark-ec2/ec2-variables.sh‎
Lines changed: 2 additions & 0 deletions
@@ -75,7 +75,7 @@ can be run using:
     ./dev/run-tests
 
 Please see the guidance on how to 
-[run all automated tests](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-AutomatedTesting)
+[run all automated tests](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-AutomatedTesting).
 
 ## A Note About Hadoop Versions
 
 
@@ -776,7 +776,7 @@ private[spark] object PythonRDD extends Logging {
   }
 
   /**
-   * Convert and RDD of Java objects to and RDD of serialized Python objects, that is usable by
+   * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by
    * PySpark.
    */
   def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {
 
@@ -19,6 +19,7 @@ package org.apache.spark.network
 
 import java.io.{FileInputStream, RandomAccessFile, File, InputStream}
 import java.nio.ByteBuffer
+import java.nio.channels.FileChannel
 import java.nio.channels.FileChannel.MapMode
 
 import com.google.common.io.ByteStreams
@@ -66,8 +67,15 @@ final class FileSegmentManagedBuffer(val file: File, val offset: Long, val lengt
   override def size: Long = length
 
   override def nioByteBuffer(): ByteBuffer = {
-    val channel = new RandomAccessFile(file, "r").getChannel
-    channel.map(MapMode.READ_ONLY, offset, length)
+    var channel: FileChannel = null
+    try {
+      channel = new RandomAccessFile(file, "r").getChannel
+      channel.map(MapMode.READ_ONLY, offset, length)
+    } finally {
+      if (channel != null) {
+        channel.close()
+      }
+    }
   }
 
   override def inputStream(): InputStream = {
 
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashSet
 import scala.collection.mutable.Queue
 
-import org.apache.spark.{TaskContext, Logging, SparkException}
+import org.apache.spark.{TaskContext, Logging}
 import org.apache.spark.network.{ManagedBuffer, BlockFetchingListener, BlockTransferService}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
 
@@ -23,7 +23,6 @@ import javax.servlet.http.HttpServletRequest
 import scala.io.Source
 import scala.util.{Failure, Success, Try}
 
-import org.eclipse.jetty.server.Server
 import org.eclipse.jetty.servlet.ServletContextHandler
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Eventually._
@@ -108,14 +107,8 @@ class UISuite extends FunSuite {
   }
 
   test("jetty selects different port under contention") {
-    val startPort = 4040
-    val server = new Server(startPort)
-
-    Try { server.start() } match {
-      case Success(s) =>
-      case Failure(e) =>
-      // Either case server port is busy hence setup for test complete
-    }
+    val server = new ServerSocket(0)
+    val startPort = server.getLocalPort
     val serverInfo1 = JettyUtils.startJettyServer(
       "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
     val serverInfo2 = JettyUtils.startJettyServer(
@@ -126,6 +119,9 @@ class UISuite extends FunSuite {
     assert(boundPort1 != startPort)
     assert(boundPort2 != startPort)
     assert(boundPort1 != boundPort2)
+    serverInfo1.server.stop()
+    serverInfo2.server.stop()
+    server.close()
   }
 
   test("jetty binds to port 0 correctly") {
 
@@ -111,6 +111,7 @@
                                 <li class="divider"></li>
                                 <li><a href="building-spark.html">Building Spark</a></li>
                                 <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">Contributing to Spark</a></li>
+                                <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects">Supplemental Projects</a></li>
                             </ul>
                         </li>
                     </ul>
@@ -151,7 +152,7 @@ <h1 class="title">{{ page.title }}</h1>
                     MathJax.Hub.Config({
                         tex2jax: {
                             inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
-                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ], 
+                            displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
                             processEscapes: true,
                             skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
                         }
 
@@ -156,6 +156,6 @@ If you have a patch or suggestion for one of these limitations, feel free to
 
 # Accessing Data in S3
 
-Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
+Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. To provide AWS credentials for S3 access, launch the Spark cluster with the option `--copy-aws-credentials`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
 
 In addition to using a single input file, you can also use a directory of files as input by simply giving the path to the directory.
@@ -107,6 +107,7 @@ options for deployment:
   * [OpenStack Swift](storage-openstack-swift.html)
 * [Building Spark](building-spark.html): build Spark using the Maven system
 * [Contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
+* [Supplemental Projects](https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects): related third party Spark projects
 
 **External Resources:**
 
 
@@ -128,7 +128,7 @@ feature parity with a HiveContext.
 
 </div>
 
-The specific variant of SQL that is used to parse queries can also be selected using the 
+The specific variant of SQL that is used to parse queries can also be selected using the
 `spark.sql.dialect` option.  This parameter can be changed using either the `setConf` method on
 a SQLContext or by using a `SET key=value` command in SQL.  For a SQLContext, the only dialect
 available is "sql" which uses a simple SQL parser provided by Spark SQL.  In a HiveContext, the
@@ -139,7 +139,7 @@ default is "hiveql", though "sql" is also available.  Since the HiveQL parser is
 
 Spark SQL supports operating on a variety of data sources through the `SchemaRDD` interface.
 A SchemaRDD can be operated on as normal RDDs and can also be registered as a temporary table.
-Registering a SchemaRDD as a table allows you to run SQL queries over its data.  This section 
+Registering a SchemaRDD as a table allows you to run SQL queries over its data.  This section
 describes the various methods for loading data into a SchemaRDD.
 
 ## RDDs
@@ -152,7 +152,7 @@ while writing your Spark application.
 The second method for creating SchemaRDDs is through a programmatic interface that allows you to
 construct a schema and then apply it to an existing RDD.  While this method is more verbose, it allows
 you to construct SchemaRDDs when the columns and their types are not known until runtime.
- 
+
 ### Inferring the Schema Using Reflection
 <div class="codetabs">
 
@@ -193,7 +193,7 @@ teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
 <div data-lang="java"  markdown="1">
 
 Spark SQL supports automatically converting an RDD of [JavaBeans](http://stackoverflow.com/questions/3295496/what-is-a-javabean-exactly)
-into a Schema RDD.  The BeanInfo, obtained using reflection, defines the schema of the table. 
+into a Schema RDD.  The BeanInfo, obtained using reflection, defines the schema of the table.
 Currently, Spark SQL does not support JavaBeans that contain
 nested or contain complex types such as Lists or Arrays.  You can create a JavaBean by creating a
 class that implements Serializable and has getters and setters for all of its fields.
@@ -480,7 +480,7 @@ for name in names.collect():
 
 [Parquet](http://parquet.io) is a columnar format that is supported by many other data processing systems.
 Spark SQL provides support for both reading and writing Parquet files that automatically preserves the schema
-of the original data.  
+of the original data.
 
 ### Loading Data Programmatically
 
@@ -562,7 +562,7 @@ for teenName in teenNames.collect():
 
 </div>
 
-</div>                                                                             
+</div>
 
 ### Configuration
 
@@ -808,7 +808,7 @@ memory usage and GC pressure. You can call `uncacheTable("tableName")` to remove
 Note that if you call `cache` rather than `cacheTable`, tables will _not_ be cached using
 the in-memory columnar format, and therefore `cacheTable` is strongly recommended for this use case.
 
-Configuration of in-memory caching can be done using the `setConf` method on SQLContext or by running 
+Configuration of in-memory caching can be done using the `setConf` method on SQLContext or by running
 `SET key=value` commands using SQL.
 
 <table class="table">
@@ -881,10 +881,32 @@ To start the JDBC server, run the following in the Spark directory:
 
     ./sbin/start-thriftserver.sh
 
-The default port the server listens on is 10000.  To listen on customized host and port, please set
-the `HIVE_SERVER2_THRIFT_PORT` and `HIVE_SERVER2_THRIFT_BIND_HOST` environment variables. You may
-run `./sbin/start-thriftserver.sh --help` for a complete list of all available options.  Now you can
-use beeline to test the Thrift JDBC server:
+This script accepts all `bin/spark-submit` command line options, plus a `--hiveconf` option to
+specify Hive properties.  You may run `./sbin/start-thriftserver.sh --help` for a complete list of
+all available options.  By default, the server listens on localhost:10000. You may override this
+bahaviour via either environment variables, i.e.:
+
+{% highlight bash %}
+export HIVE_SERVER2_THRIFT_PORT=<listening-port>
+export HIVE_SERVER2_THRIFT_BIND_HOST=<listening-host>
+./sbin/start-thriftserver.sh \
+  --master <master-uri> \
+  ...
+```
+{% endhighlight %}
+
+or system properties:
+
+{% highlight bash %}
+./sbin/start-thriftserver.sh \
+  --hiveconf hive.server2.thrift.port=<listening-port> \
+  --hiveconf hive.server2.thrift.bind.host=<listening-host> \
+  --master <master-uri>
+  ...
+```
+{% endhighlight %}
+
+Now you can use beeline to test the Thrift JDBC server:
 
     ./bin/beeline
 
@@ -930,7 +952,7 @@ SQL deprecates this property in favor of `spark.sql.shuffle.partitions`, whose d
 is 200. Users may customize this property via `SET`:
 
     SET spark.sql.shuffle.partitions=10;
-    SELECT page, count(*) c 
+    SELECT page, count(*) c
     FROM logs_last_month_cached
     GROUP BY page ORDER BY c DESC LIMIT 10;
 
@@ -1139,7 +1161,7 @@ evaluated by the SQL execution engine.  A full list of the functions supported c
 <div data-lang="scala"  markdown="1">
 
 All data types of Spark SQL are located in the package `org.apache.spark.sql`.
-You can access them by doing 
+You can access them by doing
 {% highlight scala %}
 import  org.apache.spark.sql._
 {% endhighlight %}
@@ -1245,7 +1267,7 @@ import  org.apache.spark.sql._
 <tr>
   <td> <b>StructType</b> </td>
   <td> org.apache.spark.sql.Row </td>
-  <td> 
+  <td>
   StructType(<i>fields</i>)<br />
   <b>Note:</b> <i>fields</i> is a Seq of StructFields. Also, two fields with the same
   name are not allowed.
@@ -1267,7 +1289,7 @@ import  org.apache.spark.sql._
 
 All data types of Spark SQL are located in the package of
 `org.apache.spark.sql.api.java`. To access or create a data type,
-please use factory methods provided in 
+please use factory methods provided in
 `org.apache.spark.sql.api.java.DataType`.
 
 <table class="table">
@@ -1373,7 +1395,7 @@ please use factory methods provided in
 <tr>
   <td> <b>StructType</b> </td>
   <td> org.apache.spark.sql.api.java </td>
-  <td> 
+  <td>
   DataType.createStructType(<i>fields</i>)<br />
   <b>Note:</b> <i>fields</i> is a List or an array of StructFields.
   Also, two fields with the same name are not allowed.
@@ -1394,7 +1416,7 @@ please use factory methods provided in
 <div data-lang="python"  markdown="1">
 
 All data types of Spark SQL are located in the package of `pyspark.sql`.
-You can access them by doing 
+You can access them by doing
 {% highlight python %}
 from pyspark.sql import *
 {% endhighlight %}
@@ -1518,7 +1540,7 @@ from pyspark.sql import *
 <tr>
   <td> <b>StructType</b> </td>
   <td> list or tuple </td>
-  <td> 
+  <td>
   StructType(<i>fields</i>)<br />
   <b>Note:</b> <i>fields</i> is a Seq of StructFields. Also, two fields with the same
   name are not allowed.
 
@@ -30,3 +30,5 @@ export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
 export SWAP_MB="{{swap}}"
 export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
 export SPARK_MASTER_OPTS="{{spark_master_opts}}"
+export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}"
+export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}"
Original file line number	Diff line number	Diff line change
`@@ -776,7 +776,7 @@ private[spark] object PythonRDD extends Logging {`
`776`	`776`	`}`
`777`	`777`
`778`	`778`	`/**`
`779`		`- * Convert and RDD of Java objects to and RDD of serialized Python objects, that is usable by`
	`779`	`+ * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by`
`780`	`780`	`* PySpark.`
`781`	`781`	`*/`
`782`	`782`	`def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {`