From 6544b7eb493a5b150692ef01325752145845267e Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 10 Jun 2014 19:14:32 -0400 Subject: [PATCH 1/5] [SPARK-2065] give launched instances names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This update gives launched EC2 instances descriptive names by using instance tags. Launched instances now show up in the EC2 console with these names. I used `format()` with named parameters, which I believe is the recommended practice for string formatting in Python, but which doesn’t seem to be used elsewhere in the script. --- ec2/spark_ec2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 9d5748ba4bc23..4a12bdd2c9a81 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -418,6 +418,12 @@ def launch_cluster(conn, opts, cluster_name): master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) + # Give the instances descriptive names + for master in master_nodes: + master.add_tag(key='Name', value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) + for slave in slave_nodes: + slave.add_tag(key='Name', value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) + # Return all the instances return (master_nodes, slave_nodes) From 2627247506bb3b5c3d7509081825a0d6c718895e Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 10 Jun 2014 22:25:28 -0400 Subject: [PATCH 2/5] broke up lines before they hit 100 chars --- ec2/spark_ec2.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 4a12bdd2c9a81..6ce531e5054d8 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -420,9 +420,13 @@ def launch_cluster(conn, opts, cluster_name): # Give the instances descriptive names for master in master_nodes: - master.add_tag(key='Name', value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) + master.add_tag( + key='Name', + value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) for slave in slave_nodes: - slave.add_tag(key='Name', value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) + slave.add_tag( + key='Name', + value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) # Return all the instances return (master_nodes, slave_nodes) From 69f6e222ad4c02ea0fb117d6a70a720ef2d4fa59 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 10 Jun 2014 23:03:03 -0400 Subject: [PATCH 3/5] PEP8 fixes --- ec2/spark_ec2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 6ce531e5054d8..52a89cb2481ca 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -200,6 +200,7 @@ def get_spark_shark_version(opts): sys.exit(1) return (version, spark_shark_map[version]) + # Attempt to resolve an appropriate AMI given the architecture and # region of the request. def get_spark_ami(opts): @@ -421,11 +422,11 @@ def launch_cluster(conn, opts, cluster_name): # Give the instances descriptive names for master in master_nodes: master.add_tag( - key='Name', + key='Name', value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) for slave in slave_nodes: slave.add_tag( - key='Name', + key='Name', value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) # Return all the instances From a36eed0b5772f90be9f11fceddf1c6a5618450ff Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 9 Jul 2014 10:50:07 -0400 Subject: [PATCH 4/5] name ec2 instances and security groups consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security groups created by spark-ec2 do not prepend “spark-“ to the name. Since naming the instances themselves is new to spark-ec2, it’s better to change that pattern to match the existing naming pattern for the security groups, rather than the other way around. --- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index f5c2bfb697c81..64b31b9d2e620 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -428,11 +428,11 @@ def launch_cluster(conn, opts, cluster_name): for master in master_nodes: master.add_tag( key='Name', - value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) + value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) for slave in slave_nodes: slave.add_tag( key='Name', - value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) + value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) # Return all the instances return (master_nodes, slave_nodes) From f7e45813a3a58ba369d5f21595f671c84f5b91ff Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 9 Jul 2014 14:26:17 -0400 Subject: [PATCH 5/5] unrelated pep8 fix Functions in Python should be preceded by 2 blank lines, not 1. --- ec2/spark_ec2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 64b31b9d2e620..44775ea479ece 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -699,6 +699,7 @@ def ssh(host, opts, command): time.sleep(30) tries = tries + 1 + # Backported from Python 2.7 for compatiblity with 2.6 (See SPARK-1990) def _check_output(*popenargs, **kwargs): if 'stdout' in kwargs: