| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. | 
|  | 2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr | 
|  | 3 # | 
|  | 4 # This file is part of logilab-common. | 
|  | 5 # | 
|  | 6 # logilab-common is free software: you can redistribute it and/or modify it unde
     r | 
|  | 7 # the terms of the GNU Lesser General Public License as published by the Free | 
|  | 8 # Software Foundation, either version 2.1 of the License, or (at your option) an
     y | 
|  | 9 # later version. | 
|  | 10 # | 
|  | 11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT | 
|  | 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | 13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more | 
|  | 14 # details. | 
|  | 15 # | 
|  | 16 # You should have received a copy of the GNU Lesser General Public License along | 
|  | 17 # with logilab-common.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 18 """Unicode email support (extends email from stdlib). | 
|  | 19 | 
|  | 20 | 
|  | 21 | 
|  | 22 | 
|  | 23 """ | 
|  | 24 __docformat__ = "restructuredtext en" | 
|  | 25 | 
|  | 26 import email | 
|  | 27 from encodings import search_function | 
|  | 28 import sys | 
|  | 29 if sys.version_info >= (2, 5): | 
|  | 30     from email.utils import parseaddr, parsedate | 
|  | 31     from email.header import decode_header | 
|  | 32 else: | 
|  | 33     from email.Utils import parseaddr, parsedate | 
|  | 34     from email.Header import decode_header | 
|  | 35 | 
|  | 36 from datetime import datetime | 
|  | 37 | 
|  | 38 try: | 
|  | 39     from mx.DateTime import DateTime | 
|  | 40 except ImportError: | 
|  | 41     DateTime = datetime | 
|  | 42 | 
|  | 43 import logilab.common as lgc | 
|  | 44 | 
|  | 45 | 
|  | 46 def decode_QP(string): | 
|  | 47     parts = [] | 
|  | 48     for decoded, charset in decode_header(string): | 
|  | 49         if not charset : | 
|  | 50             charset = 'iso-8859-15' | 
|  | 51         parts.append(unicode(decoded, charset, 'replace')) | 
|  | 52 | 
|  | 53     return u' '.join(parts) | 
|  | 54 | 
|  | 55 def message_from_file(fd): | 
|  | 56     try: | 
|  | 57         return UMessage(email.message_from_file(fd)) | 
|  | 58     except email.Errors.MessageParseError: | 
|  | 59         return '' | 
|  | 60 | 
|  | 61 def message_from_string(string): | 
|  | 62     try: | 
|  | 63         return UMessage(email.message_from_string(string)) | 
|  | 64     except email.Errors.MessageParseError: | 
|  | 65         return '' | 
|  | 66 | 
|  | 67 class UMessage: | 
|  | 68     """Encapsulates an email.Message instance and returns only unicode objects. | 
|  | 69     """ | 
|  | 70 | 
|  | 71     def __init__(self, message): | 
|  | 72         self.message = message | 
|  | 73 | 
|  | 74     # email.Message interface ################################################# | 
|  | 75 | 
|  | 76     def get(self, header, default=None): | 
|  | 77         value = self.message.get(header, default) | 
|  | 78         if value: | 
|  | 79             return decode_QP(value) | 
|  | 80         return value | 
|  | 81 | 
|  | 82     def get_all(self, header, default=()): | 
|  | 83         return [decode_QP(val) for val in self.message.get_all(header, default) | 
|  | 84                 if val is not None] | 
|  | 85 | 
|  | 86     def get_payload(self, index=None, decode=False): | 
|  | 87         message = self.message | 
|  | 88         if index is None: | 
|  | 89             payload = message.get_payload(index, decode) | 
|  | 90             if isinstance(payload, list): | 
|  | 91                 return [UMessage(msg) for msg in payload] | 
|  | 92             if message.get_content_maintype() != 'text': | 
|  | 93                 return payload | 
|  | 94 | 
|  | 95             charset = message.get_content_charset() or 'iso-8859-1' | 
|  | 96             if search_function(charset) is None: | 
|  | 97                 charset = 'iso-8859-1' | 
|  | 98             return unicode(payload or '', charset, "replace") | 
|  | 99         else: | 
|  | 100             payload = UMessage(message.get_payload(index, decode)) | 
|  | 101         return payload | 
|  | 102 | 
|  | 103     def is_multipart(self): | 
|  | 104         return self.message.is_multipart() | 
|  | 105 | 
|  | 106     def get_boundary(self): | 
|  | 107         return self.message.get_boundary() | 
|  | 108 | 
|  | 109     def walk(self): | 
|  | 110         for part in self.message.walk(): | 
|  | 111             yield UMessage(part) | 
|  | 112 | 
|  | 113     def get_content_maintype(self): | 
|  | 114         return unicode(self.message.get_content_maintype()) | 
|  | 115 | 
|  | 116     def get_content_type(self): | 
|  | 117         return unicode(self.message.get_content_type()) | 
|  | 118 | 
|  | 119     def get_filename(self, failobj=None): | 
|  | 120         value = self.message.get_filename(failobj) | 
|  | 121         if value is failobj: | 
|  | 122             return value | 
|  | 123         try: | 
|  | 124             return unicode(value) | 
|  | 125         except UnicodeDecodeError: | 
|  | 126             return u'error decoding filename' | 
|  | 127 | 
|  | 128     # other convenience methods ############################################### | 
|  | 129 | 
|  | 130     def headers(self): | 
|  | 131         """return an unicode string containing all the message's headers""" | 
|  | 132         values = [] | 
|  | 133         for header in self.message.keys(): | 
|  | 134             values.append(u'%s: %s' % (header, self.get(header))) | 
|  | 135         return '\n'.join(values) | 
|  | 136 | 
|  | 137     def multi_addrs(self, header): | 
|  | 138         """return a list of 2-uple (name, address) for the given address (which | 
|  | 139         is expected to be an header containing address such as from, to, cc...) | 
|  | 140         """ | 
|  | 141         persons = [] | 
|  | 142         for person in self.get_all(header, ()): | 
|  | 143             name, mail = parseaddr(person) | 
|  | 144             persons.append((name, mail)) | 
|  | 145         return persons | 
|  | 146 | 
|  | 147     def date(self, alternative_source=False, return_str=False): | 
|  | 148         """return a datetime object for the email's date or None if no date is | 
|  | 149         set or if it can't be parsed | 
|  | 150         """ | 
|  | 151         value = self.get('date') | 
|  | 152         if value is None and alternative_source: | 
|  | 153             unix_from = self.message.get_unixfrom() | 
|  | 154             if unix_from is not None: | 
|  | 155                 try: | 
|  | 156                     value = unix_from.split(" ", 2)[2] | 
|  | 157                 except IndexError: | 
|  | 158                     pass | 
|  | 159         if value is not None: | 
|  | 160             datetuple = parsedate(value) | 
|  | 161             if datetuple: | 
|  | 162                 if lgc.USE_MX_DATETIME: | 
|  | 163                     return DateTime(*datetuple[:6]) | 
|  | 164                 return datetime(*datetuple[:6]) | 
|  | 165             elif not return_str: | 
|  | 166                 return None | 
|  | 167         return value | 
| OLD | NEW | 
|---|