OLD | NEW |
| (Empty) |
1 # -*- test-case-name: buildbot.test.test_mailparse -*- | |
2 | |
3 """ | |
4 Parse various kinds of 'CVS notify' email. | |
5 """ | |
6 import os, re | |
7 import time, calendar | |
8 from email import message_from_file | |
9 from email.Utils import parseaddr | |
10 from email.Iterators import body_line_iterator | |
11 | |
12 from zope.interface import implements | |
13 from twisted.python import log | |
14 from buildbot import util | |
15 from buildbot.interfaces import IChangeSource | |
16 from buildbot.changes import changes | |
17 from buildbot.changes.maildir import MaildirService | |
18 | |
19 class MaildirSource(MaildirService, util.ComparableMixin): | |
20 """This source will watch a maildir that is subscribed to a FreshCVS | |
21 change-announcement mailing list. | |
22 """ | |
23 implements(IChangeSource) | |
24 | |
25 compare_attrs = ["basedir", "pollinterval", "prefix"] | |
26 name = None | |
27 | |
28 def __init__(self, maildir, prefix=None): | |
29 MaildirService.__init__(self, maildir) | |
30 self.prefix = prefix | |
31 if prefix and not prefix.endswith("/"): | |
32 log.msg("%s: you probably want your prefix=('%s') to end with " | |
33 "a slash") | |
34 | |
35 def describe(self): | |
36 return "%s mailing list in maildir %s" % (self.name, self.basedir) | |
37 | |
38 def messageReceived(self, filename): | |
39 path = os.path.join(self.basedir, "new", filename) | |
40 change = self.parse_file(open(path, "r"), self.prefix) | |
41 if change: | |
42 self.parent.addChange(change) | |
43 os.rename(os.path.join(self.basedir, "new", filename), | |
44 os.path.join(self.basedir, "cur", filename)) | |
45 | |
46 def parse_file(self, fd, prefix=None): | |
47 m = message_from_file(fd) | |
48 return self.parse(m, prefix) | |
49 | |
50 class FCMaildirSource(MaildirSource): | |
51 name = "FreshCVS" | |
52 | |
53 def parse(self, m, prefix=None): | |
54 """Parse mail sent by FreshCVS""" | |
55 | |
56 # FreshCVS sets From: to "user CVS <user>", but the <> part may be | |
57 # modified by the MTA (to include a local domain) | |
58 name, addr = parseaddr(m["from"]) | |
59 if not name: | |
60 return None # no From means this message isn't from FreshCVS | |
61 cvs = name.find(" CVS") | |
62 if cvs == -1: | |
63 return None # this message isn't from FreshCVS | |
64 who = name[:cvs] | |
65 | |
66 # we take the time of receipt as the time of checkin. Not correct, | |
67 # but it avoids the out-of-order-changes issue. See the comment in | |
68 # parseSyncmail about using the 'Date:' header | |
69 when = util.now() | |
70 | |
71 files = [] | |
72 comments = "" | |
73 isdir = 0 | |
74 lines = list(body_line_iterator(m)) | |
75 while lines: | |
76 line = lines.pop(0) | |
77 if line == "Modified files:\n": | |
78 break | |
79 while lines: | |
80 line = lines.pop(0) | |
81 if line == "\n": | |
82 break | |
83 line = line.rstrip("\n") | |
84 linebits = line.split(None, 1) | |
85 file = linebits[0] | |
86 if prefix: | |
87 # insist that the file start with the prefix: FreshCVS sends | |
88 # changes we don't care about too | |
89 if file.startswith(prefix): | |
90 file = file[len(prefix):] | |
91 else: | |
92 continue | |
93 if len(linebits) == 1: | |
94 isdir = 1 | |
95 elif linebits[1] == "0 0": | |
96 isdir = 1 | |
97 files.append(file) | |
98 while lines: | |
99 line = lines.pop(0) | |
100 if line == "Log message:\n": | |
101 break | |
102 # message is terminated by "ViewCVS links:" or "Index:..." (patch) | |
103 while lines: | |
104 line = lines.pop(0) | |
105 if line == "ViewCVS links:\n": | |
106 break | |
107 if line.find("Index: ") == 0: | |
108 break | |
109 comments += line | |
110 comments = comments.rstrip() + "\n" | |
111 | |
112 if not files: | |
113 return None | |
114 | |
115 change = changes.Change(who, files, comments, isdir, when=when) | |
116 | |
117 return change | |
118 | |
119 class SyncmailMaildirSource(MaildirSource): | |
120 name = "Syncmail" | |
121 | |
122 def parse(self, m, prefix=None): | |
123 """Parse messages sent by the 'syncmail' program, as suggested by the | |
124 sourceforge.net CVS Admin documentation. Syncmail is maintained at | |
125 syncmail.sf.net . | |
126 """ | |
127 # pretty much the same as freshcvs mail, not surprising since CVS is | |
128 # the one creating most of the text | |
129 | |
130 # The mail is sent from the person doing the checkin. Assume that the | |
131 # local username is enough to identify them (this assumes a one-server | |
132 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS | |
133 # model) | |
134 name, addr = parseaddr(m["from"]) | |
135 if not addr: | |
136 return None # no From means this message isn't from FreshCVS | |
137 at = addr.find("@") | |
138 if at == -1: | |
139 who = addr # might still be useful | |
140 else: | |
141 who = addr[:at] | |
142 | |
143 # we take the time of receipt as the time of checkin. Not correct (it | |
144 # depends upon the email latency), but it avoids the | |
145 # out-of-order-changes issue. Also syncmail doesn't give us anything | |
146 # better to work with, unless you count pulling the v1-vs-v2 | |
147 # timestamp out of the diffs, which would be ugly. TODO: Pulling the | |
148 # 'Date:' header from the mail is a possibility, and | |
149 # email.Utils.parsedate_tz may be useful. It should be configurable, | |
150 # however, because there are a lot of broken clocks out there. | |
151 when = util.now() | |
152 | |
153 subject = m["subject"] | |
154 # syncmail puts the repository-relative directory in the subject: | |
155 # mprefix + "%(dir)s %(file)s,%(oldversion)s,%(newversion)s", where | |
156 # 'mprefix' is something that could be added by a mailing list | |
157 # manager. | |
158 # this is the only reasonable way to determine the directory name | |
159 space = subject.find(" ") | |
160 if space != -1: | |
161 directory = subject[:space] | |
162 else: | |
163 directory = subject | |
164 | |
165 files = [] | |
166 comments = "" | |
167 isdir = 0 | |
168 branch = None | |
169 | |
170 lines = list(body_line_iterator(m)) | |
171 while lines: | |
172 line = lines.pop(0) | |
173 | |
174 if (line == "Modified Files:\n" or | |
175 line == "Added Files:\n" or | |
176 line == "Removed Files:\n"): | |
177 break | |
178 | |
179 while lines: | |
180 line = lines.pop(0) | |
181 if line == "\n": | |
182 break | |
183 if line == "Log Message:\n": | |
184 lines.insert(0, line) | |
185 break | |
186 line = line.lstrip() | |
187 line = line.rstrip() | |
188 # note: syncmail will send one email per directory involved in a | |
189 # commit, with multiple files if they were in the same directory. | |
190 # Unlike freshCVS, it makes no attempt to collect all related | |
191 # commits into a single message. | |
192 | |
193 # note: syncmail will report a Tag underneath the ... Files: line | |
194 # e.g.: Tag: BRANCH-DEVEL | |
195 | |
196 if line.startswith('Tag:'): | |
197 branch = line.split(' ')[-1].rstrip() | |
198 continue | |
199 | |
200 thesefiles = line.split(" ") | |
201 for f in thesefiles: | |
202 f = directory + "/" + f | |
203 if prefix: | |
204 # insist that the file start with the prefix: we may get | |
205 # changes we don't care about too | |
206 if f.startswith(prefix): | |
207 f = f[len(prefix):] | |
208 else: | |
209 continue | |
210 break | |
211 # TODO: figure out how new directories are described, set | |
212 # .isdir | |
213 files.append(f) | |
214 | |
215 if not files: | |
216 return None | |
217 | |
218 while lines: | |
219 line = lines.pop(0) | |
220 if line == "Log Message:\n": | |
221 break | |
222 # message is terminated by "Index:..." (patch) or "--- NEW FILE.." | |
223 # or "--- filename DELETED ---". Sigh. | |
224 while lines: | |
225 line = lines.pop(0) | |
226 if line.find("Index: ") == 0: | |
227 break | |
228 if re.search(r"^--- NEW FILE", line): | |
229 break | |
230 if re.search(r" DELETED ---$", line): | |
231 break | |
232 comments += line | |
233 comments = comments.rstrip() + "\n" | |
234 | |
235 change = changes.Change(who, files, comments, isdir, when=when, | |
236 branch=branch) | |
237 | |
238 return change | |
239 | |
240 # Bonsai mail parser by Stephen Davis. | |
241 # | |
242 # This handles changes for CVS repositories that are watched by Bonsai | |
243 # (http://www.mozilla.org/bonsai.html) | |
244 | |
245 # A Bonsai-formatted email message looks like: | |
246 # | |
247 # C|1071099907|stephend|/cvs|Sources/Scripts/buildbot|bonsai.py|1.2|||18|7 | |
248 # A|1071099907|stephend|/cvs|Sources/Scripts/buildbot|master.cfg|1.1|||18|7 | |
249 # R|1071099907|stephend|/cvs|Sources/Scripts/buildbot|BuildMaster.py||| | |
250 # LOGCOMMENT | |
251 # Updated bonsai parser and switched master config to buildbot-0.4.1 style. | |
252 # | |
253 # :ENDLOGCOMMENT | |
254 # | |
255 # In the first example line, stephend is the user, /cvs the repository, | |
256 # buildbot the directory, bonsai.py the file, 1.2 the revision, no sticky | |
257 # and branch, 18 lines added and 7 removed. All of these fields might not be | |
258 # present (during "removes" for example). | |
259 # | |
260 # There may be multiple "control" lines or even none (imports, directory | |
261 # additions) but there is one email per directory. We only care about actual | |
262 # changes since it is presumed directory additions don't actually affect the | |
263 # build. At least one file should need to change (the makefile, say) to | |
264 # actually make a new directory part of the build process. That's my story | |
265 # and I'm sticking to it. | |
266 | |
267 class BonsaiMaildirSource(MaildirSource): | |
268 name = "Bonsai" | |
269 | |
270 def parse(self, m, prefix=None): | |
271 """Parse mail sent by the Bonsai cvs loginfo script.""" | |
272 | |
273 # we don't care who the email came from b/c the cvs user is in the | |
274 # msg text | |
275 | |
276 who = "unknown" | |
277 timestamp = None | |
278 files = [] | |
279 lines = list(body_line_iterator(m)) | |
280 | |
281 # read the control lines (what/who/where/file/etc.) | |
282 while lines: | |
283 line = lines.pop(0) | |
284 if line == "LOGCOMMENT\n": | |
285 break; | |
286 line = line.rstrip("\n") | |
287 | |
288 # we'd like to do the following but it won't work if the number of | |
289 # items doesn't match so... | |
290 # what, timestamp, user, repo, module, file = line.split( '|' ) | |
291 items = line.split('|') | |
292 if len(items) < 6: | |
293 # not a valid line, assume this isn't a bonsai message | |
294 return None | |
295 | |
296 try: | |
297 # just grab the bottom-most timestamp, they're probably all the | |
298 # same. TODO: I'm assuming this is relative to the epoch, but | |
299 # this needs testing. | |
300 timestamp = int(items[1]) | |
301 except ValueError: | |
302 pass | |
303 | |
304 user = items[2] | |
305 if user: | |
306 who = user | |
307 | |
308 module = items[4] | |
309 file = items[5] | |
310 if module and file: | |
311 path = "%s/%s" % (module, file) | |
312 files.append(path) | |
313 sticky = items[7] | |
314 branch = items[8] | |
315 | |
316 # if no files changed, return nothing | |
317 if not files: | |
318 return None | |
319 | |
320 # read the comments | |
321 comments = "" | |
322 while lines: | |
323 line = lines.pop(0) | |
324 if line == ":ENDLOGCOMMENT\n": | |
325 break | |
326 comments += line | |
327 comments = comments.rstrip() + "\n" | |
328 | |
329 # return buildbot Change object | |
330 return changes.Change(who, files, comments, when=timestamp, | |
331 branch=branch) | |
332 | |
333 # svn "commit-email.pl" handler. The format is very similar to freshcvs mail; | |
334 # here's a sample: | |
335 | |
336 # From: username [at] apache.org [slightly obfuscated to avoid spam here] | |
337 # To: commits [at] spamassassin.apache.org | |
338 # Subject: svn commit: r105955 - in spamassassin/trunk: . lib/Mail | |
339 # ... | |
340 # | |
341 # Author: username | |
342 # Date: Sat Nov 20 00:17:49 2004 [note: TZ = local tz on server!] | |
343 # New Revision: 105955 | |
344 # | |
345 # Modified: [also Removed: and Added:] | |
346 # [filename] | |
347 # ... | |
348 # Log: | |
349 # [log message] | |
350 # ... | |
351 # | |
352 # | |
353 # Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm | |
354 # [unified diff] | |
355 # | |
356 # [end of mail] | |
357 | |
358 class SVNCommitEmailMaildirSource(MaildirSource): | |
359 name = "SVN commit-email.pl" | |
360 | |
361 def parse(self, m, prefix=None): | |
362 """Parse messages sent by the svn 'commit-email.pl' trigger. | |
363 """ | |
364 | |
365 # The mail is sent from the person doing the checkin. Assume that the | |
366 # local username is enough to identify them (this assumes a one-server | |
367 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS | |
368 # model) | |
369 name, addr = parseaddr(m["from"]) | |
370 if not addr: | |
371 return None # no From means this message isn't from FreshCVS | |
372 at = addr.find("@") | |
373 if at == -1: | |
374 who = addr # might still be useful | |
375 else: | |
376 who = addr[:at] | |
377 | |
378 # we take the time of receipt as the time of checkin. Not correct (it | |
379 # depends upon the email latency), but it avoids the | |
380 # out-of-order-changes issue. Also syncmail doesn't give us anything | |
381 # better to work with, unless you count pulling the v1-vs-v2 | |
382 # timestamp out of the diffs, which would be ugly. TODO: Pulling the | |
383 # 'Date:' header from the mail is a possibility, and | |
384 # email.Utils.parsedate_tz may be useful. It should be configurable, | |
385 # however, because there are a lot of broken clocks out there. | |
386 when = util.now() | |
387 | |
388 files = [] | |
389 comments = "" | |
390 isdir = 0 | |
391 lines = list(body_line_iterator(m)) | |
392 rev = None | |
393 while lines: | |
394 line = lines.pop(0) | |
395 | |
396 # "Author: jmason" | |
397 match = re.search(r"^Author: (\S+)", line) | |
398 if match: | |
399 who = match.group(1) | |
400 | |
401 # "New Revision: 105955" | |
402 match = re.search(r"^New Revision: (\d+)", line) | |
403 if match: | |
404 rev = match.group(1) | |
405 | |
406 # possible TODO: use "Date: ..." data here instead of time of | |
407 # commit message receipt, above. however, this timestamp is | |
408 # specified *without* a timezone, in the server's local TZ, so to | |
409 # be accurate buildbot would need a config setting to specify the | |
410 # source server's expected TZ setting! messy. | |
411 | |
412 # this stanza ends with the "Log:" | |
413 if (line == "Log:\n"): | |
414 break | |
415 | |
416 # commit message is terminated by the file-listing section | |
417 while lines: | |
418 line = lines.pop(0) | |
419 if (line == "Modified:\n" or | |
420 line == "Added:\n" or | |
421 line == "Removed:\n"): | |
422 break | |
423 comments += line | |
424 comments = comments.rstrip() + "\n" | |
425 | |
426 while lines: | |
427 line = lines.pop(0) | |
428 if line == "\n": | |
429 break | |
430 if line.find("Modified:\n") == 0: | |
431 continue # ignore this line | |
432 if line.find("Added:\n") == 0: | |
433 continue # ignore this line | |
434 if line.find("Removed:\n") == 0: | |
435 continue # ignore this line | |
436 line = line.strip() | |
437 | |
438 thesefiles = line.split(" ") | |
439 for f in thesefiles: | |
440 if prefix: | |
441 # insist that the file start with the prefix: we may get | |
442 # changes we don't care about too | |
443 if f.startswith(prefix): | |
444 f = f[len(prefix):] | |
445 else: | |
446 log.msg("ignored file from svn commit: prefix '%s' " | |
447 "does not match filename '%s'" % (prefix, f)) | |
448 continue | |
449 | |
450 # TODO: figure out how new directories are described, set | |
451 # .isdir | |
452 files.append(f) | |
453 | |
454 if not files: | |
455 log.msg("no matching files found, ignoring commit") | |
456 return None | |
457 | |
458 return changes.Change(who, files, comments, when=when, revision=rev) | |
459 | |
460 # bzr Launchpad branch subscription mails. Sample mail: | |
461 # | |
462 # From: noreply@launchpad.net | |
463 # Subject: [Branch ~knielsen/maria/tmp-buildbot-test] Rev 2701: test add file | |
464 # To: Joe <joe@acme.com> | |
465 # ... | |
466 # | |
467 # ------------------------------------------------------------ | |
468 # revno: 2701 | |
469 # committer: Joe <joe@acme.com> | |
470 # branch nick: tmpbb | |
471 # timestamp: Fri 2009-05-15 10:35:43 +0200 | |
472 # message: | |
473 # test add file | |
474 # added: | |
475 # test-add-file | |
476 # | |
477 # | |
478 # -- | |
479 # | |
480 # https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test | |
481 # | |
482 # You are subscribed to branch lp:~knielsen/maria/tmp-buildbot-test. | |
483 # To unsubscribe from this branch go to https://code.launchpad.net/~knielsen/m
aria/tmp-buildbot-test/+edit-subscription. | |
484 # | |
485 # [end of mail] | |
486 | |
487 class BzrLaunchpadEmailMaildirSource(MaildirSource): | |
488 name = "Launchpad" | |
489 | |
490 compare_attrs = MaildirSource.compare_attrs + ["branchMap", "defaultBranch"] | |
491 | |
492 def __init__(self, maildir, prefix=None, branchMap=None, defaultBranch=None,
**kwargs): | |
493 self.branchMap = branchMap | |
494 self.defaultBranch = defaultBranch | |
495 MaildirSource.__init__(self, maildir, prefix, **kwargs) | |
496 | |
497 def parse(self, m, prefix=None): | |
498 """Parse branch notification messages sent by Launchpad. | |
499 """ | |
500 | |
501 subject = m["subject"] | |
502 match = re.search(r"^\s*\[Branch\s+([^]]+)\]", subject) | |
503 if match: | |
504 repository = match.group(1) | |
505 else: | |
506 repository = None | |
507 | |
508 # Put these into a dictionary, otherwise we cannot assign them | |
509 # from nested function definitions. | |
510 d = { 'files': [], 'comments': "" } | |
511 gobbler = None | |
512 rev = None | |
513 who = None | |
514 when = util.now() | |
515 def gobble_comment(s): | |
516 d['comments'] += s + "\n" | |
517 def gobble_removed(s): | |
518 d['files'].append('%s REMOVED' % s) | |
519 def gobble_added(s): | |
520 d['files'].append('%s ADDED' % s) | |
521 def gobble_modified(s): | |
522 d['files'].append('%s MODIFIED' % s) | |
523 def gobble_renamed(s): | |
524 match = re.search(r"^(.+) => (.+)$", s) | |
525 if match: | |
526 d['files'].append('%s RENAMED %s' % (match.group(1), match.group
(2))) | |
527 else: | |
528 d['files'].append('%s RENAMED' % s) | |
529 | |
530 lines = list(body_line_iterator(m, True)) | |
531 rev = None | |
532 while lines: | |
533 line = lines.pop(0) | |
534 | |
535 # revno: 101 | |
536 match = re.search(r"^revno: ([0-9.]+)", line) | |
537 if match: | |
538 rev = match.group(1) | |
539 | |
540 # committer: Joe <joe@acme.com> | |
541 match = re.search(r"^committer: (.*)$", line) | |
542 if match: | |
543 who = match.group(1) | |
544 | |
545 # timestamp: Fri 2009-05-15 10:35:43 +0200 | |
546 # datetime.strptime() is supposed to support %z for time zone, but | |
547 # it does not seem to work. So handle the time zone manually. | |
548 match = re.search(r"^timestamp: [a-zA-Z]{3} (\d{4}-\d{2}-\d{2} \d{2}
:\d{2}:\d{2}) ([-+])(\d{2})(\d{2})$", line) | |
549 if match: | |
550 datestr = match.group(1) | |
551 tz_sign = match.group(2) | |
552 tz_hours = match.group(3) | |
553 tz_minutes = match.group(4) | |
554 when = parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes
) | |
555 | |
556 if re.search(r"^message:\s*$", line): | |
557 gobbler = gobble_comment | |
558 elif re.search(r"^removed:\s*$", line): | |
559 gobbler = gobble_removed | |
560 elif re.search(r"^added:\s*$", line): | |
561 gobbler = gobble_added | |
562 elif re.search(r"^renamed:\s*$", line): | |
563 gobbler = gobble_renamed | |
564 elif re.search(r"^modified:\s*$", line): | |
565 gobbler = gobble_modified | |
566 elif re.search(r"^ ", line) and gobbler: | |
567 gobbler(line[2:-1]) # Use :-1 to gobble trailing newline | |
568 | |
569 # Determine the name of the branch. | |
570 branch = None | |
571 if self.branchMap and repository: | |
572 if self.branchMap.has_key(repository): | |
573 branch = self.branchMap[repository] | |
574 elif self.branchMap.has_key('lp:' + repository): | |
575 branch = self.branchMap['lp:' + repository] | |
576 if not branch: | |
577 if self.defaultBranch: | |
578 branch = self.defaultBranch | |
579 else: | |
580 if repository: | |
581 branch = 'lp:' + repository | |
582 else: | |
583 branch = None | |
584 | |
585 #log.msg("parse(): rev=%s who=%s files=%s comments='%s' when=%s branch=%
s" % (rev, who, d['files'], d['comments'], time.asctime(time.localtime(when)), b
ranch)) | |
586 if rev and who: | |
587 return changes.Change(who, d['files'], d['comments'], | |
588 when=when, revision=rev, branch=branch) | |
589 else: | |
590 return None | |
591 | |
592 def parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes): | |
593 time_no_tz = calendar.timegm(time.strptime(datestr, "%Y-%m-%d %H:%M:%S")) | |
594 tz_delta = 60 * 60 * int(tz_sign + tz_hours) + 60 * int(tz_minutes) | |
595 return time_no_tz - tz_delta | |
OLD | NEW |