OLD | NEW |
| (Empty) |
1 """A LatentSlave that uses EC2 to instantiate the slaves on demand. | |
2 | |
3 Tested with Python boto 1.5c | |
4 """ | |
5 | |
6 # Portions copyright Canonical Ltd. 2009 | |
7 | |
8 import cStringIO | |
9 import os | |
10 import re | |
11 import time | |
12 import urllib | |
13 | |
14 import boto | |
15 import boto.exception | |
16 from twisted.internet import defer, threads | |
17 from twisted.python import log | |
18 | |
19 from buildbot.buildslave import AbstractLatentBuildSlave | |
20 from buildbot import interfaces | |
21 | |
22 PENDING = 'pending' | |
23 RUNNING = 'running' | |
24 SHUTTINGDOWN = 'shutting-down' | |
25 TERMINATED = 'terminated' | |
26 | |
27 class EC2LatentBuildSlave(AbstractLatentBuildSlave): | |
28 | |
29 instance = image = None | |
30 _poll_resolution = 5 # hook point for tests | |
31 | |
32 def __init__(self, name, password, instance_type, ami=None, | |
33 valid_ami_owners=None, valid_ami_location_regex=None, | |
34 elastic_ip=None, identifier=None, secret_identifier=None, | |
35 aws_id_file_path=None, user_data=None, | |
36 keypair_name='latent_buildbot_slave', | |
37 security_name='latent_buildbot_slave', | |
38 max_builds=None, notify_on_missing=[], missing_timeout=60*20, | |
39 build_wait_timeout=60*10, properties={}): | |
40 AbstractLatentBuildSlave.__init__( | |
41 self, name, password, max_builds, notify_on_missing, | |
42 missing_timeout, build_wait_timeout, properties) | |
43 if not ((ami is not None) ^ | |
44 (valid_ami_owners is not None or | |
45 valid_ami_location_regex is not None)): | |
46 raise ValueError( | |
47 'You must provide either a specific ami, or one or both of ' | |
48 'valid_ami_location_regex and valid_ami_owners') | |
49 self.ami = ami | |
50 if valid_ami_owners is not None: | |
51 if isinstance(valid_ami_owners, (int, long)): | |
52 valid_ami_owners = (valid_ami_owners,) | |
53 else: | |
54 for element in valid_ami_owners: | |
55 if not isinstance(element, (int, long)): | |
56 raise ValueError( | |
57 'valid_ami_owners should be int or iterable ' | |
58 'of ints', element) | |
59 if valid_ami_location_regex is not None: | |
60 if not isinstance(valid_ami_location_regex, basestring): | |
61 raise ValueError( | |
62 'valid_ami_location_regex should be a string') | |
63 else: | |
64 # verify that regex will compile | |
65 re.compile(valid_ami_location_regex) | |
66 self.valid_ami_owners = valid_ami_owners | |
67 self.valid_ami_location_regex = valid_ami_location_regex | |
68 self.instance_type = instance_type | |
69 self.keypair_name = keypair_name | |
70 self.security_name = security_name | |
71 self.user_data = user_data | |
72 if identifier is None: | |
73 assert secret_identifier is None, ( | |
74 'supply both or neither of identifier, secret_identifier') | |
75 if aws_id_file_path is None: | |
76 home = os.environ['HOME'] | |
77 aws_id_file_path = os.path.join(home, '.ec2', 'aws_id') | |
78 if not os.path.exists(aws_id_file_path): | |
79 raise ValueError( | |
80 "Please supply your AWS access key identifier and secret " | |
81 "access key identifier either when instantiating this %s " | |
82 "or in the %s file (on two lines).\n" % | |
83 (self.__class__.__name__, aws_id_file_path)) | |
84 aws_file = open(aws_id_file_path, 'r') | |
85 try: | |
86 identifier = aws_file.readline().strip() | |
87 secret_identifier = aws_file.readline().strip() | |
88 finally: | |
89 aws_file.close() | |
90 else: | |
91 assert aws_id_file_path is None, \ | |
92 'if you supply the identifier and secret_identifier, ' \ | |
93 'do not specify the aws_id_file_path' | |
94 assert secret_identifier is not None, \ | |
95 'supply both or neither of identifier, secret_identifier' | |
96 # Make the EC2 connection. | |
97 self.conn = boto.connect_ec2(identifier, secret_identifier) | |
98 | |
99 # Make a keypair | |
100 # | |
101 # We currently discard the keypair data because we don't need it. | |
102 # If we do need it in the future, we will always recreate the keypairs | |
103 # because there is no way to | |
104 # programmatically retrieve the private key component, unless we | |
105 # generate it and store it on the filesystem, which is an unnecessary | |
106 # usage requirement. | |
107 try: | |
108 key_pair = self.conn.get_all_key_pairs(keypair_name)[0] | |
109 # key_pair.delete() # would be used to recreate | |
110 except boto.exception.EC2ResponseError, e: | |
111 if e.code != 'InvalidKeyPair.NotFound': | |
112 if e.code == 'AuthFailure': | |
113 print ('POSSIBLE CAUSES OF ERROR:\n' | |
114 ' Did you sign up for EC2?\n' | |
115 ' Did you put a credit card number in your AWS ' | |
116 'account?\n' | |
117 'Please doublecheck before reporting a problem.\n') | |
118 raise | |
119 # make one; we would always do this, and stash the result, if we | |
120 # needed the key (for instance, to SSH to the box). We'd then | |
121 # use paramiko to use the key to connect. | |
122 self.conn.create_key_pair(keypair_name) | |
123 | |
124 # create security group | |
125 try: | |
126 group = self.conn.get_all_security_groups(security_name)[0] | |
127 except boto.exception.EC2ResponseError, e: | |
128 if e.code == 'InvalidGroup.NotFound': | |
129 self.security_group = self.conn.create_security_group( | |
130 security_name, | |
131 'Authorization to access the buildbot instance.') | |
132 # Authorize the master as necessary | |
133 # TODO this is where we'd open the hole to do the reverse pb | |
134 # connect to the buildbot | |
135 # ip = urllib.urlopen( | |
136 # 'http://checkip.amazonaws.com').read().strip() | |
137 # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip) | |
138 # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip) | |
139 else: | |
140 raise | |
141 | |
142 # get the image | |
143 if self.ami is not None: | |
144 self.image = self.conn.get_image(self.ami) | |
145 else: | |
146 # verify we have access to at least one acceptable image | |
147 discard = self.get_image() | |
148 | |
149 # get the specified elastic IP, if any | |
150 if elastic_ip is not None: | |
151 elastic_ip = self.conn.get_all_addresses([elastic_ip])[0] | |
152 self.elastic_ip = elastic_ip | |
153 | |
154 def get_image(self): | |
155 if self.image is not None: | |
156 return self.image | |
157 if self.valid_ami_location_regex: | |
158 level = 0 | |
159 options = [] | |
160 get_match = re.compile(self.valid_ami_location_regex).match | |
161 for image in self.conn.get_all_images( | |
162 owners=self.valid_ami_owners): | |
163 # gather sorting data | |
164 match = get_match(image.location) | |
165 if match: | |
166 alpha_sort = int_sort = None | |
167 if level < 2: | |
168 try: | |
169 alpha_sort = match.group(1) | |
170 except IndexError: | |
171 level = 2 | |
172 else: | |
173 if level == 0: | |
174 try: | |
175 int_sort = int(alpha_sort) | |
176 except ValueError: | |
177 level = 1 | |
178 options.append([int_sort, alpha_sort, | |
179 image.location, image.id, image]) | |
180 if level: | |
181 log.msg('sorting images at level %d' % level) | |
182 options = [candidate[level:] for candidate in options] | |
183 else: | |
184 options = [(image.location, image.id, image) for image | |
185 in self.conn.get_all_images( | |
186 owners=self.valid_ami_owners)] | |
187 options.sort() | |
188 log.msg('sorted images (last is chosen): %s' % | |
189 (', '.join( | |
190 ['%s (%s)' % (candidate[-1].id, candidate[-1].location) | |
191 for candidate in options]))) | |
192 if not options: | |
193 raise ValueError('no available images match constraints') | |
194 return options[-1][-1] | |
195 | |
196 def dns(self): | |
197 if self.instance is None: | |
198 return None | |
199 return self.instance.public_dns_name | |
200 dns = property(dns) | |
201 | |
202 def start_instance(self): | |
203 if self.instance is not None: | |
204 raise ValueError('instance active') | |
205 return threads.deferToThread(self._start_instance) | |
206 | |
207 def _start_instance(self): | |
208 image = self.get_image() | |
209 reservation = image.run( | |
210 key_name=self.keypair_name, security_groups=[self.security_name], | |
211 instance_type=self.instance_type, user_data=self.user_data) | |
212 self.instance = reservation.instances[0] | |
213 log.msg('%s %s starting instance %s' % | |
214 (self.__class__.__name__, self.slavename, self.instance.id)) | |
215 duration = 0 | |
216 interval = self._poll_resolution | |
217 while self.instance.state == PENDING: | |
218 time.sleep(interval) | |
219 duration += interval | |
220 if duration % 60 == 0: | |
221 log.msg('%s %s has waited %d minutes for instance %s' % | |
222 (self.__class__.__name__, self.slavename, duration//60, | |
223 self.instance.id)) | |
224 self.instance.update() | |
225 if self.instance.state == RUNNING: | |
226 self.output = self.instance.get_console_output() | |
227 minutes = duration//60 | |
228 seconds = duration%60 | |
229 log.msg('%s %s instance %s started on %s ' | |
230 'in about %d minutes %d seconds (%s)' % | |
231 (self.__class__.__name__, self.slavename, | |
232 self.instance.id, self.dns, minutes, seconds, | |
233 self.output.output)) | |
234 if self.elastic_ip is not None: | |
235 self.instance.use_ip(self.elastic_ip) | |
236 return [self.instance.id, | |
237 image.id, | |
238 '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)] | |
239 else: | |
240 log.msg('%s %s failed to start instance %s (%s)' % | |
241 (self.__class__.__name__, self.slavename, | |
242 self.instance.id, self.instance.state)) | |
243 raise interfaces.LatentBuildSlaveFailedToSubstantiate( | |
244 self.instance.id, self.instance.state) | |
245 | |
246 def stop_instance(self, fast=False): | |
247 if self.instance is None: | |
248 # be gentle. Something may just be trying to alert us that an | |
249 # instance never attached, and it's because, somehow, we never | |
250 # started. | |
251 return defer.succeed(None) | |
252 instance = self.instance | |
253 self.output = self.instance = None | |
254 return threads.deferToThread( | |
255 self._stop_instance, instance, fast) | |
256 | |
257 def _stop_instance(self, instance, fast): | |
258 if self.elastic_ip is not None: | |
259 self.conn.disassociate_address(self.elastic_ip.public_ip) | |
260 instance.update() | |
261 if instance.state not in (SHUTTINGDOWN, TERMINATED): | |
262 instance.stop() | |
263 log.msg('%s %s terminating instance %s' % | |
264 (self.__class__.__name__, self.slavename, instance.id)) | |
265 duration = 0 | |
266 interval = self._poll_resolution | |
267 if fast: | |
268 goal = (SHUTTINGDOWN, TERMINATED) | |
269 instance.update() | |
270 else: | |
271 goal = (TERMINATED,) | |
272 while instance.state not in goal: | |
273 time.sleep(interval) | |
274 duration += interval | |
275 if duration % 60 == 0: | |
276 log.msg( | |
277 '%s %s has waited %d minutes for instance %s to end' % | |
278 (self.__class__.__name__, self.slavename, duration//60, | |
279 instance.id)) | |
280 instance.update() | |
281 log.msg('%s %s instance %s %s ' | |
282 'after about %d minutes %d seconds' % | |
283 (self.__class__.__name__, self.slavename, | |
284 instance.id, goal, duration//60, duration%60)) | |
OLD | NEW |