3030import time
3131import urllib2
3232
33- ## Fill in release details here:
33+ # Fill in release details here:
3434RELEASE_URL = "http://people.apache.org/~pwendell/spark-1.0.0-rc1/"
3535RELEASE_KEY = "9E4FE3AF"
3636RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1006/"
3737RELEASE_VERSION = "1.0.0"
3838SCALA_VERSION = "2.10.4"
3939SCALA_BINARY_VERSION = "2.10"
40- ##
40+ #
4141
4242LOG_FILE_NAME = "spark_audit_%s" % time .strftime ("%h_%m_%Y_%I_%M_%S" )
4343LOG_FILE = open (LOG_FILE_NAME , 'w' )
44- WORK_DIR = "/tmp/audit_%s" % int (time .time ())
44+ WORK_DIR = "/tmp/audit_%s" % int (time .time ())
4545MAVEN_CMD = "mvn"
4646GPG_CMD = "gpg"
4747
5050# Track failures
5151failures = []
5252
53+
5354def clean_work_files ():
54- print "OK to delete scratch directory '%s'? (y/N): " % WORK_DIR
55- response = raw_input ()
56- if response == "y" :
57- shutil .rmtree (WORK_DIR )
58- print "Should I delete the log output file '%s'? (y/N): " % LOG_FILE_NAME
59- response = raw_input ()
60- if response == "y" :
61- os .unlink (LOG_FILE_NAME )
55+ print "OK to delete scratch directory '%s'? (y/N): " % WORK_DIR
56+ response = raw_input ()
57+ if response == "y" :
58+ shutil .rmtree (WORK_DIR )
59+ print "Should I delete the log output file '%s'? (y/N): " % LOG_FILE_NAME
60+ response = raw_input ()
61+ if response == "y" :
62+ os .unlink (LOG_FILE_NAME )
63+
6264
6365def run_cmd (cmd , exit_on_failure = True ):
64- print >> LOG_FILE , "Running command: %s" % cmd
65- ret = subprocess .call (cmd , shell = True , stdout = LOG_FILE , stderr = LOG_FILE )
66- if ret != 0 and exit_on_failure :
67- print "Command failed: %s" % cmd
68- clean_work_files ()
69- sys .exit (- 1 )
70- return ret
66+ print >> LOG_FILE , "Running command: %s" % cmd
67+ ret = subprocess .call (cmd , shell = True , stdout = LOG_FILE , stderr = LOG_FILE )
68+ if ret != 0 and exit_on_failure :
69+ print "Command failed: %s" % cmd
70+ clean_work_files ()
71+ sys .exit (- 1 )
72+ return ret
73+
7174
7275def run_cmd_with_output (cmd ):
73- print >> sys .stderr , "Running command: %s" % cmd
74- return subprocess .check_output (cmd , shell = True , stderr = LOG_FILE )
76+ print >> sys .stderr , "Running command: %s" % cmd
77+ return subprocess .check_output (cmd , shell = True , stderr = LOG_FILE )
78+
7579
7680def test (bool , str ):
77- if bool :
78- return passed (str )
79- failed (str )
81+ if bool :
82+ return passed (str )
83+ failed (str )
84+
8085
8186def passed (str ):
82- print "[PASSED] %s" % str
87+ print "[PASSED] %s" % str
88+
8389
8490def failed (str ):
85- failures .append (str )
86- print "[**FAILED**] %s" % str
91+ failures .append (str )
92+ print "[**FAILED**] %s" % str
93+
8794
8895def get_url (url ):
89- return urllib2 .urlopen (url ).read ()
96+ return urllib2 .urlopen (url ).read ()
97+
9098
9199original_dir = os .getcwd ()
92100
93- # For each of these modules, we'll test an 'empty' application in sbt and
101+ # For each of these modules, we'll test an 'empty' application in sbt and
94102# maven that links against them. This will catch issues with messed up
95103# dependencies within those projects.
96104modules = [
97- "spark-core" , "spark-bagel" , "spark-mllib" , "spark-streaming" , "spark-repl" ,
98- "spark-graphx" , "spark-streaming-flume" , "spark-streaming-kafka" ,
99- "spark-streaming-mqtt" , "spark-streaming-twitter" , "spark-streaming-zeromq" ,
100- "spark-catalyst" , "spark-sql" , "spark-hive"
105+ "spark-core" , "spark-bagel" , "spark-mllib" , "spark-streaming" , "spark-repl" ,
106+ "spark-graphx" , "spark-streaming-flume" , "spark-streaming-kafka" ,
107+ "spark-streaming-mqtt" , "spark-streaming-twitter" , "spark-streaming-zeromq" ,
108+ "spark-catalyst" , "spark-sql" , "spark-hive"
101109]
102110modules = map (lambda m : "%s_%s" % (m , SCALA_BINARY_VERSION ), modules )
103111
@@ -106,54 +114,57 @@ def get_url(url):
106114cache_ivy_spark = "~/.ivy2/cache/org.apache.spark"
107115local_maven_kafka = "~/.m2/repository/org/apache/kafka"
108116local_maven_kafka = "~/.m2/repository/org/apache/spark"
117+
118+
109119def ensure_path_not_present (x ):
110- if os .path .exists (os .path .expanduser (x )):
111- print "Please remove %s, it can interfere with testing published artifacts." % x
112- sys .exit (- 1 )
120+ if os .path .exists (os .path .expanduser (x )):
121+ print "Please remove %s, it can interfere with testing published artifacts." % x
122+ sys .exit (- 1 )
123+
113124map (ensure_path_not_present , [local_ivy_spark , cache_ivy_spark , local_maven_kafka ])
114125
115- # SBT build tests
126+ # SBT build tests
116127os .chdir ("blank_sbt_build" )
117128os .environ ["SPARK_VERSION" ] = RELEASE_VERSION
118129os .environ ["SCALA_VERSION" ] = SCALA_VERSION
119130os .environ ["SPARK_RELEASE_REPOSITORY" ] = RELEASE_REPOSITORY
120131os .environ ["SPARK_AUDIT_MASTER" ] = "local"
121132for module in modules :
122- os .environ ["SPARK_MODULE" ] = module
123- ret = run_cmd ("sbt clean update" , exit_on_failure = False )
124- test (ret == 0 , "sbt build against '%s' module" % module )
133+ os .environ ["SPARK_MODULE" ] = module
134+ ret = run_cmd ("sbt clean update" , exit_on_failure = False )
135+ test (ret == 0 , "sbt build against '%s' module" % module )
125136os .chdir (original_dir )
126137
127138# SBT application tests
128139for app in ["sbt_app_core" , "sbt_app_graphx" , "sbt_app_streaming" , "sbt_app_sql" , "sbt_app_hive" ]:
129- os .chdir (app )
130- ret = run_cmd ("sbt clean run" , exit_on_failure = False )
131- test (ret == 0 , "sbt application (%s)" % app )
132- os .chdir (original_dir )
140+ os .chdir (app )
141+ ret = run_cmd ("sbt clean run" , exit_on_failure = False )
142+ test (ret == 0 , "sbt application (%s)" % app )
143+ os .chdir (original_dir )
133144
134145# Maven build tests
135146os .chdir ("blank_maven_build" )
136147for module in modules :
137- cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
138- '-Dspark.module="%s" clean compile' %
139- (MAVEN_CMD , RELEASE_REPOSITORY , RELEASE_VERSION , module ))
140- ret = run_cmd (cmd , exit_on_failure = False )
141- test (ret == 0 , "maven build against '%s' module" % module )
148+ cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
149+ '-Dspark.module="%s" clean compile' %
150+ (MAVEN_CMD , RELEASE_REPOSITORY , RELEASE_VERSION , module ))
151+ ret = run_cmd (cmd , exit_on_failure = False )
152+ test (ret == 0 , "maven build against '%s' module" % module )
142153os .chdir (original_dir )
143154
144155os .chdir ("maven_app_core" )
145156mvn_exec_cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
146157 '-Dscala.binary.version="%s" clean compile '
147- 'exec:java -Dexec.mainClass="SimpleApp"' %
148- (MAVEN_CMD , RELEASE_REPOSITORY , RELEASE_VERSION , SCALA_BINARY_VERSION ))
158+ 'exec:java -Dexec.mainClass="SimpleApp"' %
159+ (MAVEN_CMD , RELEASE_REPOSITORY , RELEASE_VERSION , SCALA_BINARY_VERSION ))
149160ret = run_cmd (mvn_exec_cmd , exit_on_failure = False )
150161test (ret == 0 , "maven application (core)" )
151162os .chdir (original_dir )
152163
153164# Binary artifact tests
154165if os .path .exists (WORK_DIR ):
155- print "Working directory '%s' already exists" % WORK_DIR
156- sys .exit (- 1 )
166+ print "Working directory '%s' already exists" % WORK_DIR
167+ sys .exit (- 1 )
157168os .mkdir (WORK_DIR )
158169os .chdir (WORK_DIR )
159170
@@ -162,66 +173,66 @@ def ensure_path_not_present(x):
162173artifacts = r .findall (index_page )
163174
164175for artifact in artifacts :
165- print "==== Verifying download integrity for artifact: %s ====" % artifact
166-
167- artifact_url = "%s/%s" % (RELEASE_URL , artifact )
168- run_cmd ("wget %s" % artifact_url )
169-
170- key_file = "%s.asc" % artifact
171- run_cmd ("wget %s/%s" % (RELEASE_URL , key_file ))
172-
173- run_cmd ("wget %s%s" % (artifact_url , ".sha" ))
174-
175- # Verify signature
176- run_cmd ("%s --keyserver pgp.mit.edu --recv-key %s" % (GPG_CMD , RELEASE_KEY ))
177- run_cmd ("%s %s" % (GPG_CMD , key_file ))
178- passed ("Artifact signature verified." )
179-
180- # Verify md5
181- my_md5 = run_cmd_with_output ("%s --print-md MD5 %s" % (GPG_CMD , artifact )).strip ()
182- release_md5 = get_url ("%s.md5" % artifact_url ).strip ()
183- test (my_md5 == release_md5 , "Artifact MD5 verified." )
184-
185- # Verify sha
186- my_sha = run_cmd_with_output ("%s --print-md SHA512 %s" % (GPG_CMD , artifact )).strip ()
187- release_sha = get_url ("%s.sha" % artifact_url ).strip ()
188- test (my_sha == release_sha , "Artifact SHA verified." )
189-
190- # Verify Apache required files
191- dir_name = artifact .replace (".tgz" , "" )
192- run_cmd ("tar xvzf %s" % artifact )
193- base_files = os .listdir (dir_name )
194- test ("CHANGES.txt" in base_files , "Tarball contains CHANGES.txt file" )
195- test ("NOTICE" in base_files , "Tarball contains NOTICE file" )
196- test ("LICENSE" in base_files , "Tarball contains LICENSE file" )
197-
198- os .chdir (WORK_DIR )
199-
176+ print "==== Verifying download integrity for artifact: %s ====" % artifact
177+
178+ artifact_url = "%s/%s" % (RELEASE_URL , artifact )
179+ run_cmd ("wget %s" % artifact_url )
180+
181+ key_file = "%s.asc" % artifact
182+ run_cmd ("wget %s/%s" % (RELEASE_URL , key_file ))
183+
184+ run_cmd ("wget %s%s" % (artifact_url , ".sha" ))
185+
186+ # Verify signature
187+ run_cmd ("%s --keyserver pgp.mit.edu --recv-key %s" % (GPG_CMD , RELEASE_KEY ))
188+ run_cmd ("%s %s" % (GPG_CMD , key_file ))
189+ passed ("Artifact signature verified." )
190+
191+ # Verify md5
192+ my_md5 = run_cmd_with_output ("%s --print-md MD5 %s" % (GPG_CMD , artifact )).strip ()
193+ release_md5 = get_url ("%s.md5" % artifact_url ).strip ()
194+ test (my_md5 == release_md5 , "Artifact MD5 verified." )
195+
196+ # Verify sha
197+ my_sha = run_cmd_with_output ("%s --print-md SHA512 %s" % (GPG_CMD , artifact )).strip ()
198+ release_sha = get_url ("%s.sha" % artifact_url ).strip ()
199+ test (my_sha == release_sha , "Artifact SHA verified." )
200+
201+ # Verify Apache required files
202+ dir_name = artifact .replace (".tgz" , "" )
203+ run_cmd ("tar xvzf %s" % artifact )
204+ base_files = os .listdir (dir_name )
205+ test ("CHANGES.txt" in base_files , "Tarball contains CHANGES.txt file" )
206+ test ("NOTICE" in base_files , "Tarball contains NOTICE file" )
207+ test ("LICENSE" in base_files , "Tarball contains LICENSE file" )
208+
209+ os .chdir (WORK_DIR )
210+
200211for artifact in artifacts :
201- print "==== Verifying build and tests for artifact: %s ====" % artifact
202- os .chdir (os .path .join (WORK_DIR , dir_name ))
203-
204- os .environ ["MAVEN_OPTS" ] = "-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
205- # Verify build
206- print "==> Running build"
207- run_cmd ("sbt assembly" )
208- passed ("sbt build successful" )
209- run_cmd ("%s package -DskipTests" % MAVEN_CMD )
210- passed ("Maven build successful" )
211-
212- # Verify tests
213- print "==> Performing unit tests"
214- run_cmd ("%s test" % MAVEN_CMD )
215- passed ("Tests successful" )
216- os .chdir (WORK_DIR )
212+ print "==== Verifying build and tests for artifact: %s ====" % artifact
213+ os .chdir (os .path .join (WORK_DIR , dir_name ))
214+
215+ os .environ ["MAVEN_OPTS" ] = "-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
216+ # Verify build
217+ print "==> Running build"
218+ run_cmd ("sbt assembly" )
219+ passed ("sbt build successful" )
220+ run_cmd ("%s package -DskipTests" % MAVEN_CMD )
221+ passed ("Maven build successful" )
222+
223+ # Verify tests
224+ print "==> Performing unit tests"
225+ run_cmd ("%s test" % MAVEN_CMD )
226+ passed ("Tests successful" )
227+ os .chdir (WORK_DIR )
217228
218229clean_work_files ()
219230
220231if len (failures ) == 0 :
221- print "ALL TESTS PASSED"
232+ print "ALL TESTS PASSED"
222233else :
223- print "SOME TESTS DID NOT PASS"
224- for f in failures :
225- print f
234+ print "SOME TESTS DID NOT PASS"
235+ for f in failures :
236+ print f
226237
227238os .chdir (original_dir )
0 commit comments