Skip to content

Commit b4dc980

Browse files
author
Andrew Or
committed
Merge branch 'master' of github.com:apache/spark into rest-tests
2 parents b55e40f + 99bd500 commit b4dc980

File tree

22 files changed

+96
-286
lines changed

22 files changed

+96
-286
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
target
2+
cache
23
.gitignore
34
.gitattributes
45
.project

assembly/pom.xml

Lines changed: 0 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@
3636
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
3737
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
3838
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
39-
<deb.pkg.name>spark</deb.pkg.name>
40-
<deb.install.path>/usr/share/spark</deb.install.path>
41-
<deb.user>root</deb.user>
42-
<deb.bin.filemode>755</deb.bin.filemode>
4339
</properties>
4440

4541
<dependencies>
@@ -217,131 +213,6 @@
217213
</plugins>
218214
</build>
219215
</profile>
220-
<profile>
221-
<id>deb</id>
222-
<build>
223-
<plugins>
224-
<plugin>
225-
<artifactId>maven-antrun-plugin</artifactId>
226-
<executions>
227-
<execution>
228-
<phase>prepare-package</phase>
229-
<goals>
230-
<goal>run</goal>
231-
</goals>
232-
<configuration>
233-
<target>
234-
<echo>
235-
NOTE: Debian packaging is deprecated and is scheduled to be removed in Spark 1.4.
236-
</echo>
237-
</target>
238-
</configuration>
239-
</execution>
240-
</executions>
241-
</plugin>
242-
<plugin>
243-
<groupId>org.codehaus.mojo</groupId>
244-
<artifactId>buildnumber-maven-plugin</artifactId>
245-
<version>1.2</version>
246-
<executions>
247-
<execution>
248-
<phase>validate</phase>
249-
<goals>
250-
<goal>create</goal>
251-
</goals>
252-
<configuration>
253-
<shortRevisionLength>8</shortRevisionLength>
254-
</configuration>
255-
</execution>
256-
</executions>
257-
</plugin>
258-
<plugin>
259-
<groupId>org.vafer</groupId>
260-
<artifactId>jdeb</artifactId>
261-
<version>0.11</version>
262-
<executions>
263-
<execution>
264-
<phase>package</phase>
265-
<goals>
266-
<goal>jdeb</goal>
267-
</goals>
268-
<configuration>
269-
<deb>${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb</deb>
270-
<attach>false</attach>
271-
<compression>gzip</compression>
272-
<dataSet>
273-
<data>
274-
<src>${spark.jar}</src>
275-
<type>file</type>
276-
<mapper>
277-
<type>perm</type>
278-
<user>${deb.user}</user>
279-
<group>${deb.user}</group>
280-
<prefix>${deb.install.path}/jars</prefix>
281-
</mapper>
282-
</data>
283-
<data>
284-
<src>${basedir}/src/deb/RELEASE</src>
285-
<type>file</type>
286-
<mapper>
287-
<type>perm</type>
288-
<user>${deb.user}</user>
289-
<group>${deb.user}</group>
290-
<prefix>${deb.install.path}</prefix>
291-
</mapper>
292-
</data>
293-
<data>
294-
<src>${basedir}/../conf</src>
295-
<type>directory</type>
296-
<mapper>
297-
<type>perm</type>
298-
<user>${deb.user}</user>
299-
<group>${deb.user}</group>
300-
<prefix>${deb.install.path}/conf</prefix>
301-
<filemode>${deb.bin.filemode}</filemode>
302-
</mapper>
303-
</data>
304-
<data>
305-
<src>${basedir}/../bin</src>
306-
<type>directory</type>
307-
<mapper>
308-
<type>perm</type>
309-
<user>${deb.user}</user>
310-
<group>${deb.user}</group>
311-
<prefix>${deb.install.path}/bin</prefix>
312-
<filemode>${deb.bin.filemode}</filemode>
313-
</mapper>
314-
</data>
315-
<data>
316-
<src>${basedir}/../sbin</src>
317-
<type>directory</type>
318-
<mapper>
319-
<type>perm</type>
320-
<user>${deb.user}</user>
321-
<group>${deb.user}</group>
322-
<prefix>${deb.install.path}/sbin</prefix>
323-
<filemode>${deb.bin.filemode}</filemode>
324-
</mapper>
325-
</data>
326-
<data>
327-
<src>${basedir}/../python</src>
328-
<type>directory</type>
329-
<mapper>
330-
<type>perm</type>
331-
<user>${deb.user}</user>
332-
<group>${deb.user}</group>
333-
<prefix>${deb.install.path}/python</prefix>
334-
<filemode>${deb.bin.filemode}</filemode>
335-
</mapper>
336-
</data>
337-
</dataSet>
338-
</configuration>
339-
</execution>
340-
</executions>
341-
</plugin>
342-
</plugins>
343-
</build>
344-
</profile>
345216
<profile>
346217
<id>kinesis-asl</id>
347218
<dependencies>

assembly/src/deb/RELEASE

Lines changed: 0 additions & 2 deletions
This file was deleted.

assembly/src/deb/control/control

Lines changed: 0 additions & 8 deletions
This file was deleted.

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -283,13 +283,6 @@ private[spark] object Utils extends Logging {
283283
dir = new File(root, namePrefix + "-" + UUID.randomUUID.toString)
284284
if (dir.exists() || !dir.mkdirs()) {
285285
dir = null
286-
} else {
287-
// Restrict file permissions via chmod if available.
288-
// For Windows this step is ignored.
289-
if (!isWindows && !chmod700(dir)) {
290-
dir.delete()
291-
dir = null
292-
}
293286
}
294287
} catch { case e: SecurityException => dir = null; }
295288
}
@@ -703,7 +696,9 @@ private[spark] object Utils extends Logging {
703696
try {
704697
val rootDir = new File(root)
705698
if (rootDir.exists || rootDir.mkdirs()) {
706-
Some(createDirectory(root).getAbsolutePath())
699+
val dir = createDirectory(root)
700+
chmod700(dir)
701+
Some(dir.getAbsolutePath)
707702
} else {
708703
logError(s"Failed to create dir in $root. Ignoring this directory.")
709704
None

docs/building-spark.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -159,16 +159,6 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m
159159
For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the
160160
[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup).
161161

162-
# Building Spark Debian Packages
163-
164-
_NOTE: Debian packaging is deprecated and is scheduled to be removed in Spark 1.4._
165-
166-
The Maven build includes support for building a Debian package containing the assembly 'fat-jar', PySpark, and the necessary scripts and configuration files. This can be created by specifying the following:
167-
168-
mvn -Pdeb -DskipTests clean package
169-
170-
The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
171-
172162
# Running Java 8 Test Suites
173163

174164
Running only Java 8 tests and nothing else.

mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
package org.apache.spark.mllib.classification
1919

20+
import org.json4s.{DefaultFormats, JValue}
21+
2022
import org.apache.spark.annotation.Experimental
2123
import org.apache.spark.api.java.JavaRDD
2224
import org.apache.spark.mllib.linalg.Vector
23-
import org.apache.spark.mllib.util.Loader
2425
import org.apache.spark.rdd.RDD
25-
import org.apache.spark.sql.{DataFrame, Row}
2626

2727
/**
2828
* :: Experimental ::
@@ -60,16 +60,10 @@ private[mllib] object ClassificationModel {
6060

6161
/**
6262
* Helper method for loading GLM classification model metadata.
63-
*
64-
* @param modelClass String name for model class (used for error messages)
6563
* @return (numFeatures, numClasses)
6664
*/
67-
def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: String): (Int, Int) = {
68-
metadata.select("numFeatures", "numClasses").take(1)(0) match {
69-
case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses)
70-
case _ => throw new Exception(s"$modelClass unable to load" +
71-
s" numFeatures, numClasses from metadata: ${Loader.metadataPath(path)}")
72-
}
65+
def getNumFeaturesClasses(metadata: JValue): (Int, Int) = {
66+
implicit val formats = DefaultFormats
67+
((metadata \ "numFeatures").extract[Int], (metadata \ "numClasses").extract[Int])
7368
}
74-
7569
}

mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
173173
val classNameV1_0 = "org.apache.spark.mllib.classification.LogisticRegressionModel"
174174
(loadedClassName, version) match {
175175
case (className, "1.0") if className == classNameV1_0 =>
176-
val (numFeatures, numClasses) =
177-
ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
176+
val (numFeatures, numClasses) = ClassificationModel.getNumFeaturesClasses(metadata)
178177
val data = GLMClassificationModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0)
179178
// numFeatures, numClasses, weights are checked in model initialization
180179
val model =

mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818
package org.apache.spark.mllib.classification
1919

2020
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
21+
import org.json4s.JsonDSL._
22+
import org.json4s.jackson.JsonMethods._
2123

22-
import org.apache.spark.{SparkContext, SparkException, Logging}
24+
import org.apache.spark.{Logging, SparkContext, SparkException}
2325
import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
2426
import org.apache.spark.mllib.regression.LabeledPoint
2527
import org.apache.spark.mllib.util.{Loader, Saveable}
2628
import org.apache.spark.rdd.RDD
2729
import org.apache.spark.sql.{DataFrame, SQLContext}
2830

29-
3031
/**
3132
* Model for Naive Bayes Classifiers.
3233
*
@@ -78,7 +79,7 @@ class NaiveBayesModel private[mllib] (
7879

7980
object NaiveBayesModel extends Loader[NaiveBayesModel] {
8081

81-
import Loader._
82+
import org.apache.spark.mllib.util.Loader._
8283

8384
private object SaveLoadV1_0 {
8485

@@ -95,10 +96,10 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
9596
import sqlContext.implicits._
9697

9798
// Create JSON metadata.
98-
val metadataRDD =
99-
sc.parallelize(Seq((thisClassName, thisFormatVersion, data.theta(0).size, data.pi.size)), 1)
100-
.toDataFrame("class", "version", "numFeatures", "numClasses")
101-
metadataRDD.toJSON.saveAsTextFile(metadataPath(path))
99+
val metadata = compact(render(
100+
("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~
101+
("numFeatures" -> data.theta(0).length) ~ ("numClasses" -> data.pi.length)))
102+
sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))
102103

103104
// Create Parquet data.
104105
val dataRDD: DataFrame = sc.parallelize(Seq(data), 1)
@@ -126,8 +127,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
126127
val classNameV1_0 = SaveLoadV1_0.thisClassName
127128
(loadedClassName, version) match {
128129
case (className, "1.0") if className == classNameV1_0 =>
129-
val (numFeatures, numClasses) =
130-
ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
130+
val (numFeatures, numClasses) = ClassificationModel.getNumFeaturesClasses(metadata)
131131
val model = SaveLoadV1_0.load(sc, path)
132132
assert(model.pi.size == numClasses,
133133
s"NaiveBayesModel.load expected $numClasses classes," +

mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ import org.apache.spark.mllib.classification.impl.GLMClassificationModel
2323
import org.apache.spark.mllib.linalg.Vector
2424
import org.apache.spark.mllib.optimization._
2525
import org.apache.spark.mllib.regression._
26-
import org.apache.spark.mllib.util.{DataValidators, Saveable, Loader}
26+
import org.apache.spark.mllib.util.{DataValidators, Loader, Saveable}
2727
import org.apache.spark.rdd.RDD
2828

29-
3029
/**
3130
* Model for Support Vector Machines (SVMs).
3231
*
@@ -97,8 +96,7 @@ object SVMModel extends Loader[SVMModel] {
9796
val classNameV1_0 = "org.apache.spark.mllib.classification.SVMModel"
9897
(loadedClassName, version) match {
9998
case (className, "1.0") if className == classNameV1_0 =>
100-
val (numFeatures, numClasses) =
101-
ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
99+
val (numFeatures, numClasses) = ClassificationModel.getNumFeaturesClasses(metadata)
102100
val data = GLMClassificationModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0)
103101
val model = new SVMModel(data.weights, data.intercept)
104102
assert(model.weights.size == numFeatures, s"SVMModel.load with numFeatures=$numFeatures" +

0 commit comments

Comments
 (0)