Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(132)

Side by Side Diff: third_party/buildbot_7_12/buildbot/slave/bot.py

Issue 12207158: Bye bye buildbot 0.7.12. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1
2 import os.path
3
4 import buildbot
5
6 from twisted.spread import pb
7 from twisted.python import log
8 from twisted.internet import reactor, defer
9 from twisted.application import service, internet
10 from twisted.cred import credentials
11
12 from buildbot.util import now
13 from buildbot.pbutil import ReconnectingPBClientFactory
14 from buildbot.slave import registry
15 # make sure the standard commands get registered. This import is performed
16 # for its side-effects.
17 from buildbot.slave import commands
18 # and make pyflakes think we aren't being stupid
19 commands = commands
20
21 class NoCommandRunning(pb.Error):
22 pass
23 class WrongCommandRunning(pb.Error):
24 pass
25 class UnknownCommand(pb.Error):
26 pass
27
28 class Master:
29 def __init__(self, host, port, username, password):
30 self.host = host
31 self.port = port
32 self.username = username
33 self.password = password
34
35 class SlaveBuild:
36
37 """This is an object that can hold state from one step to another in the
38 same build. All SlaveCommands have access to it.
39 """
40 def __init__(self, builder):
41 self.builder = builder
42
43 class SlaveBuilder(pb.Referenceable, service.Service):
44
45 """This is the local representation of a single Builder: it handles a
46 single kind of build (like an all-warnings build). It has a name and a
47 home directory. The rest of its behavior is determined by the master.
48 """
49
50 stopCommandOnShutdown = True
51
52 # remote is a ref to the Builder object on the master side, and is set
53 # when they attach. We use it to detect when the connection to the master
54 # is severed.
55 remote = None
56
57 # .build points to a SlaveBuild object, a new one for each build
58 build = None
59
60 # .command points to a SlaveCommand instance, and is set while the step
61 # is running. We use it to implement the stopBuild method.
62 command = None
63
64 # .remoteStep is a ref to the master-side BuildStep object, and is set
65 # when the step is started
66 remoteStep = None
67
68 def __init__(self, name, not_really):
69 #service.Service.__init__(self) # Service has no __init__ method
70 self.setName(name)
71 self.not_really = not_really
72
73 def __repr__(self):
74 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
75
76 def setServiceParent(self, parent):
77 service.Service.setServiceParent(self, parent)
78 self.bot = self.parent
79 # note that self.parent will go away when the buildmaster's config
80 # file changes and this Builder is removed (possibly because it has
81 # been changed, so the Builder will be re-added again in a moment).
82 # This may occur during a build, while a step is running.
83
84 def setBuilddir(self, builddir):
85 assert self.parent
86 self.builddir = builddir
87 self.basedir = os.path.join(self.bot.basedir, self.builddir)
88 if not os.path.isdir(self.basedir):
89 os.makedirs(self.basedir)
90
91 def stopService(self):
92 service.Service.stopService(self)
93 if self.stopCommandOnShutdown:
94 self.stopCommand()
95
96 def activity(self):
97 bot = self.parent
98 if bot:
99 buildslave = bot.parent
100 if buildslave:
101 bf = buildslave.bf
102 bf.activity()
103
104 def remote_setMaster(self, remote):
105 self.remote = remote
106 self.remote.notifyOnDisconnect(self.lostRemote)
107 def remote_print(self, message):
108 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" %
109 (self.name, message))
110 if message == "ping":
111 return self.remote_ping()
112
113 def remote_ping(self):
114 log.msg("SlaveBuilder.remote_ping(%s)" % self)
115 if self.bot and self.bot.parent:
116 debugOpts = self.bot.parent.debugOpts
117 if debugOpts.get("stallPings"):
118 log.msg(" debug_stallPings")
119 timeout, timers = debugOpts["stallPings"]
120 d = defer.Deferred()
121 t = reactor.callLater(timeout, d.callback, None)
122 timers.append(t)
123 return d
124 if debugOpts.get("failPingOnce"):
125 log.msg(" debug_failPingOnce")
126 class FailPingError(pb.Error): pass
127 del debugOpts['failPingOnce']
128 raise FailPingError("debug_failPingOnce means we should fail")
129
130 def lostRemote(self, remote):
131 log.msg("lost remote")
132 self.remote = None
133
134 def lostRemoteStep(self, remotestep):
135 log.msg("lost remote step")
136 self.remoteStep = None
137 if self.stopCommandOnShutdown:
138 self.stopCommand()
139
140 # the following are Commands that can be invoked by the master-side
141 # Builder
142 def remote_startBuild(self):
143 """This is invoked before the first step of any new build is run. It
144 creates a new SlaveBuild object, which holds slave-side state from
145 one step to the next."""
146 self.build = SlaveBuild(self)
147 log.msg("%s.startBuild" % self)
148
149 def remote_startCommand(self, stepref, stepId, command, args):
150 """
151 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as
152 part of various master-side BuildSteps, to start various commands
153 that actually do the build. I return nothing. Eventually I will call
154 .commandComplete() to notify the master-side RemoteCommand that I'm
155 done.
156 """
157
158 self.activity()
159
160 if self.command:
161 log.msg("leftover command, dropping it")
162 self.stopCommand()
163
164 try:
165 factory, version = registry.commandRegistry[command]
166 except KeyError:
167 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command
168 self.command = factory(self, stepId, args)
169
170 log.msg(" startCommand:%s [id %s]" % (command,stepId))
171 self.remoteStep = stepref
172 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep)
173 d = self.command.doStart()
174 d.addCallback(lambda res: None)
175 d.addBoth(self.commandComplete)
176 return None
177
178 def remote_interruptCommand(self, stepId, why):
179 """Halt the current step."""
180 log.msg("asked to interrupt current command: %s" % why)
181 self.activity()
182 if not self.command:
183 # TODO: just log it, a race could result in their interrupting a
184 # command that wasn't actually running
185 log.msg(" .. but none was running")
186 return
187 self.command.doInterrupt()
188
189
190 def stopCommand(self):
191 """Make any currently-running command die, with no further status
192 output. This is used when the buildslave is shutting down or the
193 connection to the master has been lost. Interrupt the command,
194 silence it, and then forget about it."""
195 if not self.command:
196 return
197 log.msg("stopCommand: halting current command %s" % self.command)
198 self.command.doInterrupt() # shut up! and die!
199 self.command = None # forget you!
200
201 # sendUpdate is invoked by the Commands we spawn
202 def sendUpdate(self, data):
203 """This sends the status update to the master-side
204 L{buildbot.process.step.RemoteCommand} object, giving it a sequence
205 number in the process. It adds the update to a queue, and asks the
206 master to acknowledge the update so it can be removed from that
207 queue."""
208
209 if not self.running:
210 # .running comes from service.Service, and says whether the
211 # service is running or not. If we aren't running, don't send any
212 # status messages.
213 return
214 # the update[1]=0 comes from the leftover 'updateNum', which the
215 # master still expects to receive. Provide it to avoid significant
216 # interoperability issues between new slaves and old masters.
217 if self.remoteStep:
218 update = [data, 0]
219 updates = [update]
220 d = self.remoteStep.callRemote("update", updates)
221 d.addCallback(self.ackUpdate)
222 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
223
224 def ackUpdate(self, acknum):
225 self.activity() # update the "last activity" timer
226
227 def ackComplete(self, dummy):
228 self.activity() # update the "last activity" timer
229
230 def _ackFailed(self, why, where):
231 log.msg("SlaveBuilder._ackFailed:", where)
232 #log.err(why) # we don't really care
233
234
235 # this is fired by the Deferred attached to each Command
236 def commandComplete(self, failure):
237 if failure:
238 log.msg("SlaveBuilder.commandFailed", self.command)
239 log.err(failure)
240 # failure, if present, is a failure.Failure. To send it across
241 # the wire, we must turn it into a pb.CopyableFailure.
242 failure = pb.CopyableFailure(failure)
243 failure.unsafeTracebacks = True
244 else:
245 # failure is None
246 log.msg("SlaveBuilder.commandComplete", self.command)
247 self.command = None
248 if not self.running:
249 log.msg(" but we weren't running, quitting silently")
250 return
251 if self.remoteStep:
252 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep)
253 d = self.remoteStep.callRemote("complete", failure)
254 d.addCallback(self.ackComplete)
255 d.addErrback(self._ackFailed, "sendComplete")
256 self.remoteStep = None
257
258
259 def remote_shutdown(self):
260 print "slave shutting down on command from master"
261 reactor.stop()
262
263
264 class Bot(pb.Referenceable, service.MultiService):
265 """I represent the slave-side bot."""
266 usePTY = None
267 name = "bot"
268
269 def __init__(self, basedir, usePTY, not_really=0):
270 service.MultiService.__init__(self)
271 self.basedir = basedir
272 self.usePTY = usePTY
273 self.not_really = not_really
274 self.builders = {}
275
276 def startService(self):
277 assert os.path.isdir(self.basedir)
278 service.MultiService.startService(self)
279
280 def remote_getDirs(self):
281 return filter(lambda d: os.path.isdir(d), os.listdir(self.basedir))
282
283 def remote_getCommands(self):
284 commands = {}
285 for name, (factory, version) in registry.commandRegistry.items():
286 commands[name] = version
287 return commands
288
289 def remote_setBuilderList(self, wanted):
290 retval = {}
291 wanted_dirs = ["info"]
292 for (name, builddir) in wanted:
293 wanted_dirs.append(builddir)
294 b = self.builders.get(name, None)
295 if b:
296 if b.builddir != builddir:
297 log.msg("changing builddir for builder %s from %s to %s" \
298 % (name, b.builddir, builddir))
299 b.setBuilddir(builddir)
300 else:
301 b = SlaveBuilder(name, self.not_really)
302 b.usePTY = self.usePTY
303 b.setServiceParent(self)
304 b.setBuilddir(builddir)
305 self.builders[name] = b
306 retval[name] = b
307 for name in self.builders.keys():
308 if not name in map(lambda a: a[0], wanted):
309 log.msg("removing old builder %s" % name)
310 self.builders[name].disownServiceParent()
311 del(self.builders[name])
312
313 for d in os.listdir(self.basedir):
314 if os.path.isdir(d):
315 if d not in wanted_dirs:
316 log.msg("I have a leftover directory '%s' that is not "
317 "being used by the buildmaster: you can delete "
318 "it now" % d)
319 return retval
320
321 def remote_print(self, message):
322 log.msg("message from master:", message)
323
324 def remote_getSlaveInfo(self):
325 """This command retrieves data from the files in SLAVEDIR/info/* and
326 sends the contents to the buildmaster. These are used to describe
327 the slave and its configuration, and should be created and
328 maintained by the slave administrator. They will be retrieved each
329 time the master-slave connection is established.
330 """
331
332 files = {}
333 basedir = os.path.join(self.basedir, "info")
334 if not os.path.isdir(basedir):
335 return files
336 for f in os.listdir(basedir):
337 filename = os.path.join(basedir, f)
338 if os.path.isfile(filename):
339 files[f] = open(filename, "r").read()
340 return files
341
342 def remote_getVersion(self):
343 """Send our version back to the Master"""
344 return buildbot.version
345
346
347
348 class BotFactory(ReconnectingPBClientFactory):
349 # 'keepaliveInterval' serves two purposes. The first is to keep the
350 # connection alive: it guarantees that there will be at least some
351 # traffic once every 'keepaliveInterval' seconds, which may help keep an
352 # interposed NAT gateway from dropping the address mapping because it
353 # thinks the connection has been abandoned. The second is to put an upper
354 # limit on how long the buildmaster might have gone away before we notice
355 # it. For this second purpose, we insist upon seeing *some* evidence of
356 # the buildmaster at least once every 'keepaliveInterval' seconds.
357 keepaliveInterval = None # None = do not use keepalives
358
359 # 'keepaliveTimeout' seconds before the interval expires, we will send a
360 # keepalive request, both to add some traffic to the connection, and to
361 # prompt a response from the master in case all our builders are idle. We
362 # don't insist upon receiving a timely response from this message: a slow
363 # link might put the request at the wrong end of a large build message.
364 keepaliveTimeout = 30 # how long we will go without a response
365
366 # 'maxDelay' determines the maximum amount of time the slave will wait
367 # between connection retries
368 maxDelay = 300
369
370 keepaliveTimer = None
371 activityTimer = None
372 lastActivity = 0
373 unsafeTracebacks = 1
374 perspective = None
375
376 def __init__(self, keepaliveInterval, keepaliveTimeout, maxDelay):
377 ReconnectingPBClientFactory.__init__(self)
378 self.maxDelay = maxDelay
379 self.keepaliveInterval = keepaliveInterval
380 self.keepaliveTimeout = keepaliveTimeout
381
382 def startedConnecting(self, connector):
383 ReconnectingPBClientFactory.startedConnecting(self, connector)
384 self.connector = connector
385
386 def gotPerspective(self, perspective):
387 ReconnectingPBClientFactory.gotPerspective(self, perspective)
388 self.perspective = perspective
389 try:
390 perspective.broker.transport.setTcpKeepAlive(1)
391 except:
392 log.msg("unable to set SO_KEEPALIVE")
393 if not self.keepaliveInterval:
394 self.keepaliveInterval = 10*60
395 self.activity()
396 if self.keepaliveInterval:
397 log.msg("sending application-level keepalives every %d seconds" \
398 % self.keepaliveInterval)
399 self.startTimers()
400
401 def clientConnectionFailed(self, connector, reason):
402 self.connector = None
403 ReconnectingPBClientFactory.clientConnectionFailed(self,
404 connector, reason)
405
406 def clientConnectionLost(self, connector, reason):
407 self.connector = None
408 self.stopTimers()
409 self.perspective = None
410 ReconnectingPBClientFactory.clientConnectionLost(self,
411 connector, reason)
412
413 def startTimers(self):
414 assert self.keepaliveInterval
415 assert not self.keepaliveTimer
416 assert not self.activityTimer
417 # Insist that doKeepalive fires before checkActivity. Really, it
418 # needs to happen at least one RTT beforehand.
419 assert self.keepaliveInterval > self.keepaliveTimeout
420
421 # arrange to send a keepalive a little while before our deadline
422 when = self.keepaliveInterval - self.keepaliveTimeout
423 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive)
424 # and check for activity too
425 self.activityTimer = reactor.callLater(self.keepaliveInterval,
426 self.checkActivity)
427
428 def stopTimers(self):
429 if self.keepaliveTimer:
430 self.keepaliveTimer.cancel()
431 self.keepaliveTimer = None
432 if self.activityTimer:
433 self.activityTimer.cancel()
434 self.activityTimer = None
435
436 def activity(self, res=None):
437 self.lastActivity = now()
438
439 def doKeepalive(self):
440 # send the keepalive request. If it fails outright, the connection
441 # was already dropped, so just log and ignore.
442 self.keepaliveTimer = None
443 log.msg("sending app-level keepalive")
444 d = self.perspective.callRemote("keepalive")
445 d.addCallback(self.activity)
446 d.addErrback(self.keepaliveLost)
447
448 def keepaliveLost(self, f):
449 log.msg("BotFactory.keepaliveLost")
450
451 def checkActivity(self):
452 self.activityTimer = None
453 if self.lastActivity + self.keepaliveInterval < now():
454 log.msg("BotFactory.checkActivity: nothing from master for "
455 "%d secs" % (now() - self.lastActivity))
456 self.perspective.broker.transport.loseConnection()
457 return
458 self.startTimers()
459
460 def stopFactory(self):
461 ReconnectingPBClientFactory.stopFactory(self)
462 self.stopTimers()
463
464
465 class BuildSlave(service.MultiService):
466 botClass = Bot
467
468 # debugOpts is a dictionary used during unit tests.
469
470 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any
471 # calls to remote_print will stall for 'timeout' seconds before
472 # returning. The DelayedCalls used to implement this are stashed in the
473 # list so they can be cancelled later.
474
475 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail
476 # exactly once.
477
478 def __init__(self, buildmaster_host, port, name, passwd, basedir,
479 keepalive, usePTY, keepaliveTimeout=30, umask=None,
480 maxdelay=300, debugOpts={}):
481 log.msg("Creating BuildSlave -- buildbot.version: %s" % buildbot.version )
482 service.MultiService.__init__(self)
483 self.debugOpts = debugOpts.copy()
484 bot = self.botClass(basedir, usePTY)
485 bot.setServiceParent(self)
486 self.bot = bot
487 if keepalive == 0:
488 keepalive = None
489 self.umask = umask
490 bf = self.bf = BotFactory(keepalive, keepaliveTimeout, maxdelay)
491 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot)
492 self.connection = c = internet.TCPClient(buildmaster_host, port, bf)
493 c.setServiceParent(self)
494
495 def waitUntilDisconnected(self):
496 # utility method for testing. Returns a Deferred that will fire when
497 # we lose the connection to the master.
498 if not self.bf.perspective:
499 return defer.succeed(None)
500 d = defer.Deferred()
501 self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None))
502 return d
503
504 def startService(self):
505 if self.umask is not None:
506 os.umask(self.umask)
507 service.MultiService.startService(self)
508
509 def stopService(self):
510 self.bf.continueTrying = 0
511 self.bf.stopTrying()
512 service.MultiService.stopService(self)
513 # now kill the TCP connection
514 # twisted >2.0.1 does this for us, and leaves _connection=None
515 if self.connection._connection:
516 self.connection._connection.disconnect()
OLDNEW
« no previous file with comments | « third_party/buildbot_7_12/buildbot/slave/__init__.py ('k') | third_party/buildbot_7_12/buildbot/slave/commands.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698