OLD | NEW |
| (Empty) |
1 | |
2 import os.path | |
3 | |
4 import buildbot | |
5 | |
6 from twisted.spread import pb | |
7 from twisted.python import log | |
8 from twisted.internet import reactor, defer | |
9 from twisted.application import service, internet | |
10 from twisted.cred import credentials | |
11 | |
12 from buildbot.util import now | |
13 from buildbot.pbutil import ReconnectingPBClientFactory | |
14 from buildbot.slave import registry | |
15 # make sure the standard commands get registered. This import is performed | |
16 # for its side-effects. | |
17 from buildbot.slave import commands | |
18 # and make pyflakes think we aren't being stupid | |
19 commands = commands | |
20 | |
21 class NoCommandRunning(pb.Error): | |
22 pass | |
23 class WrongCommandRunning(pb.Error): | |
24 pass | |
25 class UnknownCommand(pb.Error): | |
26 pass | |
27 | |
28 class Master: | |
29 def __init__(self, host, port, username, password): | |
30 self.host = host | |
31 self.port = port | |
32 self.username = username | |
33 self.password = password | |
34 | |
35 class SlaveBuild: | |
36 | |
37 """This is an object that can hold state from one step to another in the | |
38 same build. All SlaveCommands have access to it. | |
39 """ | |
40 def __init__(self, builder): | |
41 self.builder = builder | |
42 | |
43 class SlaveBuilder(pb.Referenceable, service.Service): | |
44 | |
45 """This is the local representation of a single Builder: it handles a | |
46 single kind of build (like an all-warnings build). It has a name and a | |
47 home directory. The rest of its behavior is determined by the master. | |
48 """ | |
49 | |
50 stopCommandOnShutdown = True | |
51 | |
52 # remote is a ref to the Builder object on the master side, and is set | |
53 # when they attach. We use it to detect when the connection to the master | |
54 # is severed. | |
55 remote = None | |
56 | |
57 # .build points to a SlaveBuild object, a new one for each build | |
58 build = None | |
59 | |
60 # .command points to a SlaveCommand instance, and is set while the step | |
61 # is running. We use it to implement the stopBuild method. | |
62 command = None | |
63 | |
64 # .remoteStep is a ref to the master-side BuildStep object, and is set | |
65 # when the step is started | |
66 remoteStep = None | |
67 | |
68 def __init__(self, name, not_really): | |
69 #service.Service.__init__(self) # Service has no __init__ method | |
70 self.setName(name) | |
71 self.not_really = not_really | |
72 | |
73 def __repr__(self): | |
74 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self)) | |
75 | |
76 def setServiceParent(self, parent): | |
77 service.Service.setServiceParent(self, parent) | |
78 self.bot = self.parent | |
79 # note that self.parent will go away when the buildmaster's config | |
80 # file changes and this Builder is removed (possibly because it has | |
81 # been changed, so the Builder will be re-added again in a moment). | |
82 # This may occur during a build, while a step is running. | |
83 | |
84 def setBuilddir(self, builddir): | |
85 assert self.parent | |
86 self.builddir = builddir | |
87 self.basedir = os.path.join(self.bot.basedir, self.builddir) | |
88 if not os.path.isdir(self.basedir): | |
89 os.makedirs(self.basedir) | |
90 | |
91 def stopService(self): | |
92 service.Service.stopService(self) | |
93 if self.stopCommandOnShutdown: | |
94 self.stopCommand() | |
95 | |
96 def activity(self): | |
97 bot = self.parent | |
98 if bot: | |
99 buildslave = bot.parent | |
100 if buildslave: | |
101 bf = buildslave.bf | |
102 bf.activity() | |
103 | |
104 def remote_setMaster(self, remote): | |
105 self.remote = remote | |
106 self.remote.notifyOnDisconnect(self.lostRemote) | |
107 def remote_print(self, message): | |
108 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % | |
109 (self.name, message)) | |
110 if message == "ping": | |
111 return self.remote_ping() | |
112 | |
113 def remote_ping(self): | |
114 log.msg("SlaveBuilder.remote_ping(%s)" % self) | |
115 if self.bot and self.bot.parent: | |
116 debugOpts = self.bot.parent.debugOpts | |
117 if debugOpts.get("stallPings"): | |
118 log.msg(" debug_stallPings") | |
119 timeout, timers = debugOpts["stallPings"] | |
120 d = defer.Deferred() | |
121 t = reactor.callLater(timeout, d.callback, None) | |
122 timers.append(t) | |
123 return d | |
124 if debugOpts.get("failPingOnce"): | |
125 log.msg(" debug_failPingOnce") | |
126 class FailPingError(pb.Error): pass | |
127 del debugOpts['failPingOnce'] | |
128 raise FailPingError("debug_failPingOnce means we should fail") | |
129 | |
130 def lostRemote(self, remote): | |
131 log.msg("lost remote") | |
132 self.remote = None | |
133 | |
134 def lostRemoteStep(self, remotestep): | |
135 log.msg("lost remote step") | |
136 self.remoteStep = None | |
137 if self.stopCommandOnShutdown: | |
138 self.stopCommand() | |
139 | |
140 # the following are Commands that can be invoked by the master-side | |
141 # Builder | |
142 def remote_startBuild(self): | |
143 """This is invoked before the first step of any new build is run. It | |
144 creates a new SlaveBuild object, which holds slave-side state from | |
145 one step to the next.""" | |
146 self.build = SlaveBuild(self) | |
147 log.msg("%s.startBuild" % self) | |
148 | |
149 def remote_startCommand(self, stepref, stepId, command, args): | |
150 """ | |
151 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as | |
152 part of various master-side BuildSteps, to start various commands | |
153 that actually do the build. I return nothing. Eventually I will call | |
154 .commandComplete() to notify the master-side RemoteCommand that I'm | |
155 done. | |
156 """ | |
157 | |
158 self.activity() | |
159 | |
160 if self.command: | |
161 log.msg("leftover command, dropping it") | |
162 self.stopCommand() | |
163 | |
164 try: | |
165 factory, version = registry.commandRegistry[command] | |
166 except KeyError: | |
167 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command | |
168 self.command = factory(self, stepId, args) | |
169 | |
170 log.msg(" startCommand:%s [id %s]" % (command,stepId)) | |
171 self.remoteStep = stepref | |
172 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) | |
173 d = self.command.doStart() | |
174 d.addCallback(lambda res: None) | |
175 d.addBoth(self.commandComplete) | |
176 return None | |
177 | |
178 def remote_interruptCommand(self, stepId, why): | |
179 """Halt the current step.""" | |
180 log.msg("asked to interrupt current command: %s" % why) | |
181 self.activity() | |
182 if not self.command: | |
183 # TODO: just log it, a race could result in their interrupting a | |
184 # command that wasn't actually running | |
185 log.msg(" .. but none was running") | |
186 return | |
187 self.command.doInterrupt() | |
188 | |
189 | |
190 def stopCommand(self): | |
191 """Make any currently-running command die, with no further status | |
192 output. This is used when the buildslave is shutting down or the | |
193 connection to the master has been lost. Interrupt the command, | |
194 silence it, and then forget about it.""" | |
195 if not self.command: | |
196 return | |
197 log.msg("stopCommand: halting current command %s" % self.command) | |
198 self.command.doInterrupt() # shut up! and die! | |
199 self.command = None # forget you! | |
200 | |
201 # sendUpdate is invoked by the Commands we spawn | |
202 def sendUpdate(self, data): | |
203 """This sends the status update to the master-side | |
204 L{buildbot.process.step.RemoteCommand} object, giving it a sequence | |
205 number in the process. It adds the update to a queue, and asks the | |
206 master to acknowledge the update so it can be removed from that | |
207 queue.""" | |
208 | |
209 if not self.running: | |
210 # .running comes from service.Service, and says whether the | |
211 # service is running or not. If we aren't running, don't send any | |
212 # status messages. | |
213 return | |
214 # the update[1]=0 comes from the leftover 'updateNum', which the | |
215 # master still expects to receive. Provide it to avoid significant | |
216 # interoperability issues between new slaves and old masters. | |
217 if self.remoteStep: | |
218 update = [data, 0] | |
219 updates = [update] | |
220 d = self.remoteStep.callRemote("update", updates) | |
221 d.addCallback(self.ackUpdate) | |
222 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate") | |
223 | |
224 def ackUpdate(self, acknum): | |
225 self.activity() # update the "last activity" timer | |
226 | |
227 def ackComplete(self, dummy): | |
228 self.activity() # update the "last activity" timer | |
229 | |
230 def _ackFailed(self, why, where): | |
231 log.msg("SlaveBuilder._ackFailed:", where) | |
232 #log.err(why) # we don't really care | |
233 | |
234 | |
235 # this is fired by the Deferred attached to each Command | |
236 def commandComplete(self, failure): | |
237 if failure: | |
238 log.msg("SlaveBuilder.commandFailed", self.command) | |
239 log.err(failure) | |
240 # failure, if present, is a failure.Failure. To send it across | |
241 # the wire, we must turn it into a pb.CopyableFailure. | |
242 failure = pb.CopyableFailure(failure) | |
243 failure.unsafeTracebacks = True | |
244 else: | |
245 # failure is None | |
246 log.msg("SlaveBuilder.commandComplete", self.command) | |
247 self.command = None | |
248 if not self.running: | |
249 log.msg(" but we weren't running, quitting silently") | |
250 return | |
251 if self.remoteStep: | |
252 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) | |
253 d = self.remoteStep.callRemote("complete", failure) | |
254 d.addCallback(self.ackComplete) | |
255 d.addErrback(self._ackFailed, "sendComplete") | |
256 self.remoteStep = None | |
257 | |
258 | |
259 def remote_shutdown(self): | |
260 print "slave shutting down on command from master" | |
261 reactor.stop() | |
262 | |
263 | |
264 class Bot(pb.Referenceable, service.MultiService): | |
265 """I represent the slave-side bot.""" | |
266 usePTY = None | |
267 name = "bot" | |
268 | |
269 def __init__(self, basedir, usePTY, not_really=0): | |
270 service.MultiService.__init__(self) | |
271 self.basedir = basedir | |
272 self.usePTY = usePTY | |
273 self.not_really = not_really | |
274 self.builders = {} | |
275 | |
276 def startService(self): | |
277 assert os.path.isdir(self.basedir) | |
278 service.MultiService.startService(self) | |
279 | |
280 def remote_getDirs(self): | |
281 return filter(lambda d: os.path.isdir(d), os.listdir(self.basedir)) | |
282 | |
283 def remote_getCommands(self): | |
284 commands = {} | |
285 for name, (factory, version) in registry.commandRegistry.items(): | |
286 commands[name] = version | |
287 return commands | |
288 | |
289 def remote_setBuilderList(self, wanted): | |
290 retval = {} | |
291 wanted_dirs = ["info"] | |
292 for (name, builddir) in wanted: | |
293 wanted_dirs.append(builddir) | |
294 b = self.builders.get(name, None) | |
295 if b: | |
296 if b.builddir != builddir: | |
297 log.msg("changing builddir for builder %s from %s to %s" \ | |
298 % (name, b.builddir, builddir)) | |
299 b.setBuilddir(builddir) | |
300 else: | |
301 b = SlaveBuilder(name, self.not_really) | |
302 b.usePTY = self.usePTY | |
303 b.setServiceParent(self) | |
304 b.setBuilddir(builddir) | |
305 self.builders[name] = b | |
306 retval[name] = b | |
307 for name in self.builders.keys(): | |
308 if not name in map(lambda a: a[0], wanted): | |
309 log.msg("removing old builder %s" % name) | |
310 self.builders[name].disownServiceParent() | |
311 del(self.builders[name]) | |
312 | |
313 for d in os.listdir(self.basedir): | |
314 if os.path.isdir(d): | |
315 if d not in wanted_dirs: | |
316 log.msg("I have a leftover directory '%s' that is not " | |
317 "being used by the buildmaster: you can delete " | |
318 "it now" % d) | |
319 return retval | |
320 | |
321 def remote_print(self, message): | |
322 log.msg("message from master:", message) | |
323 | |
324 def remote_getSlaveInfo(self): | |
325 """This command retrieves data from the files in SLAVEDIR/info/* and | |
326 sends the contents to the buildmaster. These are used to describe | |
327 the slave and its configuration, and should be created and | |
328 maintained by the slave administrator. They will be retrieved each | |
329 time the master-slave connection is established. | |
330 """ | |
331 | |
332 files = {} | |
333 basedir = os.path.join(self.basedir, "info") | |
334 if not os.path.isdir(basedir): | |
335 return files | |
336 for f in os.listdir(basedir): | |
337 filename = os.path.join(basedir, f) | |
338 if os.path.isfile(filename): | |
339 files[f] = open(filename, "r").read() | |
340 return files | |
341 | |
342 def remote_getVersion(self): | |
343 """Send our version back to the Master""" | |
344 return buildbot.version | |
345 | |
346 | |
347 | |
348 class BotFactory(ReconnectingPBClientFactory): | |
349 # 'keepaliveInterval' serves two purposes. The first is to keep the | |
350 # connection alive: it guarantees that there will be at least some | |
351 # traffic once every 'keepaliveInterval' seconds, which may help keep an | |
352 # interposed NAT gateway from dropping the address mapping because it | |
353 # thinks the connection has been abandoned. The second is to put an upper | |
354 # limit on how long the buildmaster might have gone away before we notice | |
355 # it. For this second purpose, we insist upon seeing *some* evidence of | |
356 # the buildmaster at least once every 'keepaliveInterval' seconds. | |
357 keepaliveInterval = None # None = do not use keepalives | |
358 | |
359 # 'keepaliveTimeout' seconds before the interval expires, we will send a | |
360 # keepalive request, both to add some traffic to the connection, and to | |
361 # prompt a response from the master in case all our builders are idle. We | |
362 # don't insist upon receiving a timely response from this message: a slow | |
363 # link might put the request at the wrong end of a large build message. | |
364 keepaliveTimeout = 30 # how long we will go without a response | |
365 | |
366 # 'maxDelay' determines the maximum amount of time the slave will wait | |
367 # between connection retries | |
368 maxDelay = 300 | |
369 | |
370 keepaliveTimer = None | |
371 activityTimer = None | |
372 lastActivity = 0 | |
373 unsafeTracebacks = 1 | |
374 perspective = None | |
375 | |
376 def __init__(self, keepaliveInterval, keepaliveTimeout, maxDelay): | |
377 ReconnectingPBClientFactory.__init__(self) | |
378 self.maxDelay = maxDelay | |
379 self.keepaliveInterval = keepaliveInterval | |
380 self.keepaliveTimeout = keepaliveTimeout | |
381 | |
382 def startedConnecting(self, connector): | |
383 ReconnectingPBClientFactory.startedConnecting(self, connector) | |
384 self.connector = connector | |
385 | |
386 def gotPerspective(self, perspective): | |
387 ReconnectingPBClientFactory.gotPerspective(self, perspective) | |
388 self.perspective = perspective | |
389 try: | |
390 perspective.broker.transport.setTcpKeepAlive(1) | |
391 except: | |
392 log.msg("unable to set SO_KEEPALIVE") | |
393 if not self.keepaliveInterval: | |
394 self.keepaliveInterval = 10*60 | |
395 self.activity() | |
396 if self.keepaliveInterval: | |
397 log.msg("sending application-level keepalives every %d seconds" \ | |
398 % self.keepaliveInterval) | |
399 self.startTimers() | |
400 | |
401 def clientConnectionFailed(self, connector, reason): | |
402 self.connector = None | |
403 ReconnectingPBClientFactory.clientConnectionFailed(self, | |
404 connector, reason) | |
405 | |
406 def clientConnectionLost(self, connector, reason): | |
407 self.connector = None | |
408 self.stopTimers() | |
409 self.perspective = None | |
410 ReconnectingPBClientFactory.clientConnectionLost(self, | |
411 connector, reason) | |
412 | |
413 def startTimers(self): | |
414 assert self.keepaliveInterval | |
415 assert not self.keepaliveTimer | |
416 assert not self.activityTimer | |
417 # Insist that doKeepalive fires before checkActivity. Really, it | |
418 # needs to happen at least one RTT beforehand. | |
419 assert self.keepaliveInterval > self.keepaliveTimeout | |
420 | |
421 # arrange to send a keepalive a little while before our deadline | |
422 when = self.keepaliveInterval - self.keepaliveTimeout | |
423 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) | |
424 # and check for activity too | |
425 self.activityTimer = reactor.callLater(self.keepaliveInterval, | |
426 self.checkActivity) | |
427 | |
428 def stopTimers(self): | |
429 if self.keepaliveTimer: | |
430 self.keepaliveTimer.cancel() | |
431 self.keepaliveTimer = None | |
432 if self.activityTimer: | |
433 self.activityTimer.cancel() | |
434 self.activityTimer = None | |
435 | |
436 def activity(self, res=None): | |
437 self.lastActivity = now() | |
438 | |
439 def doKeepalive(self): | |
440 # send the keepalive request. If it fails outright, the connection | |
441 # was already dropped, so just log and ignore. | |
442 self.keepaliveTimer = None | |
443 log.msg("sending app-level keepalive") | |
444 d = self.perspective.callRemote("keepalive") | |
445 d.addCallback(self.activity) | |
446 d.addErrback(self.keepaliveLost) | |
447 | |
448 def keepaliveLost(self, f): | |
449 log.msg("BotFactory.keepaliveLost") | |
450 | |
451 def checkActivity(self): | |
452 self.activityTimer = None | |
453 if self.lastActivity + self.keepaliveInterval < now(): | |
454 log.msg("BotFactory.checkActivity: nothing from master for " | |
455 "%d secs" % (now() - self.lastActivity)) | |
456 self.perspective.broker.transport.loseConnection() | |
457 return | |
458 self.startTimers() | |
459 | |
460 def stopFactory(self): | |
461 ReconnectingPBClientFactory.stopFactory(self) | |
462 self.stopTimers() | |
463 | |
464 | |
465 class BuildSlave(service.MultiService): | |
466 botClass = Bot | |
467 | |
468 # debugOpts is a dictionary used during unit tests. | |
469 | |
470 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any | |
471 # calls to remote_print will stall for 'timeout' seconds before | |
472 # returning. The DelayedCalls used to implement this are stashed in the | |
473 # list so they can be cancelled later. | |
474 | |
475 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail | |
476 # exactly once. | |
477 | |
478 def __init__(self, buildmaster_host, port, name, passwd, basedir, | |
479 keepalive, usePTY, keepaliveTimeout=30, umask=None, | |
480 maxdelay=300, debugOpts={}): | |
481 log.msg("Creating BuildSlave -- buildbot.version: %s" % buildbot.version
) | |
482 service.MultiService.__init__(self) | |
483 self.debugOpts = debugOpts.copy() | |
484 bot = self.botClass(basedir, usePTY) | |
485 bot.setServiceParent(self) | |
486 self.bot = bot | |
487 if keepalive == 0: | |
488 keepalive = None | |
489 self.umask = umask | |
490 bf = self.bf = BotFactory(keepalive, keepaliveTimeout, maxdelay) | |
491 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) | |
492 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) | |
493 c.setServiceParent(self) | |
494 | |
495 def waitUntilDisconnected(self): | |
496 # utility method for testing. Returns a Deferred that will fire when | |
497 # we lose the connection to the master. | |
498 if not self.bf.perspective: | |
499 return defer.succeed(None) | |
500 d = defer.Deferred() | |
501 self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None)) | |
502 return d | |
503 | |
504 def startService(self): | |
505 if self.umask is not None: | |
506 os.umask(self.umask) | |
507 service.MultiService.startService(self) | |
508 | |
509 def stopService(self): | |
510 self.bf.continueTrying = 0 | |
511 self.bf.stopTrying() | |
512 service.MultiService.stopService(self) | |
513 # now kill the TCP connection | |
514 # twisted >2.0.1 does this for us, and leaves _connection=None | |
515 if self.connection._connection: | |
516 self.connection._connection.disconnect() | |
OLD | NEW |