rev |
line source |
pascal@20689
|
1 --- imapbox/mailboxresource.py
|
pascal@20689
|
2 +++ imapbox/mailboxresource.py
|
pascal@20689
|
3 @@ -9,6 +9,8 @@
|
pascal@20689
|
4 import hashlib
|
pascal@20689
|
5 from message import Message
|
pascal@20689
|
6 import datetime
|
pascal@20689
|
7 +import time
|
pascal@20689
|
8 +import rfc822
|
pascal@20689
|
9
|
pascal@20689
|
10
|
pascal@20689
|
11
|
pascal@20689
|
12 @@ -16,10 +18,22 @@
|
pascal@20689
|
13 """Operations on a mailbox"""
|
pascal@20689
|
14
|
pascal@20689
|
15 def __init__(self, host, port, username, password, remote_folder):
|
pascal@20689
|
16 + if port == '143':
|
pascal@20689
|
17 + self.mailbox = imaplib.IMAP4(host, port)
|
pascal@20689
|
18 + elif port == '993':
|
pascal@20689
|
19 self.mailbox = imaplib.IMAP4_SSL(host, port)
|
pascal@20689
|
20 self.mailbox.login(username, password)
|
pascal@20689
|
21 self.mailbox.select(remote_folder, readonly=True)
|
pascal@20689
|
22
|
pascal@20689
|
23 + if remote_folder=='INBOX':
|
pascal@20689
|
24 + self.remote_folder=''
|
pascal@20689
|
25 + else
|
pascal@20689
|
26 + r_f=re.sub('INBOX','',remote_folder)
|
pascal@20689
|
27 + if re.findall('^\.',r_f):
|
pascal@20689
|
28 + self.remote_folder=re.sub('\.','/',rf)
|
pascal@20689
|
29 + else:
|
pascal@20689
|
30 + self.remote_folder='/'+r_f
|
pascal@20689
|
31 +
|
pascal@20689
|
32 def copy_emails(self, days, local_folder, wkhtmltopdf):
|
pascal@20689
|
33
|
pascal@20689
|
34 n_saved = 0
|
pascal@20689
|
35 @@ -50,9 +64,19 @@
|
pascal@20689
|
36
|
pascal@20689
|
37
|
pascal@20689
|
38 def getEmailFolder(self, msg, data):
|
pascal@20689
|
39 - if msg['Message-Id']:
|
pascal@20689
|
40 - foldername = re.sub('[^a-zA-Z0-9_\-\.()\s]+', '', msg['Message-Id'])
|
pascal@20689
|
41 + if msg['To'] and re.findall('Sent',self.remote_folder):
|
pascal@20689
|
42 + foldername = re.findall("<.*>", msg['To'])
|
pascal@20689
|
43 + if foldername:
|
pascal@20689
|
44 + foldername = re.sub('[\<\>]+', '', foldername[0])
|
pascal@20689
|
45 else:
|
pascal@20689
|
46 + foldername=msg['To']
|
pascal@20689
|
47 + elif msg['From']:
|
pascal@20689
|
48 + foldername = re.findall("<.*>", msg['From'])
|
pascal@20689
|
49 + if foldername:
|
pascal@20689
|
50 + foldername = re.sub('[\<\>]+', '', foldername[0])
|
pascal@20689
|
51 + else:
|
pascal@20689
|
52 + foldername=msg['From']
|
pascal@20689
|
53 + else:
|
pascal@20689
|
54 foldername = hashlib.sha224(data).hexdigest()
|
pascal@20689
|
55
|
pascal@20689
|
56 year = 'None'
|
pascal@20689
|
57 @@ -62,20 +86,33 @@
|
pascal@20689
|
58 year = match.group(1)
|
pascal@20689
|
59
|
pascal@20689
|
60
|
pascal@20689
|
61 - return os.path.join(self.local_folder, year, foldername)
|
pascal@20689
|
62 + return os.path.join(self.local_folder+self.remote_folder, self.normalizeDate(msg['Date'])+'_'+foldername)
|
pascal@20689
|
63
|
pascal@20689
|
64 + def normalizeDate(self, datestr):
|
pascal@20689
|
65 + t = email.utils.parsedate_tz(datestr)
|
pascal@20689
|
66 + timeval = time.mktime(t[:-1])
|
pascal@20689
|
67 + date = email.utils.formatdate(timeval, True)
|
pascal@20689
|
68 + utc = time.gmtime(email.utils.mktime_tz(t))
|
pascal@20689
|
69 +# rfc2822 = '{} {:+03d}00'.format(date[:-6], t[9]//3600)
|
pascal@20689
|
70 + iso8601 = time.strftime('%Y%m%dT%H%M%SZ', utc)
|
pascal@20689
|
71
|
pascal@20689
|
72 + return (iso8601)
|
pascal@20689
|
73
|
pascal@20689
|
74 +
|
pascal@20689
|
75 def saveEmail(self, data):
|
pascal@20689
|
76 for response_part in data:
|
pascal@20689
|
77 if isinstance(response_part, tuple):
|
pascal@20689
|
78 - msg = email.message_from_string(response_part[1].decode("utf-8"))
|
pascal@20689
|
79 + try:
|
pascal@20689
|
80 + msg = email.message_from_string(re.sub('^>', '', response_part[1]).decode("utf-8")) # supprime '>' dans l'email
|
pascal@20689
|
81 + except UnicodeError:
|
pascal@20689
|
82 + msg = email.message_from_string(re.sub('^>', '', response_part[1]).decode('latin1').encode('utf-8')) # supprime '>' dans l'email
|
pascal@20689
|
83 directory = self.getEmailFolder(msg, data[0][1])
|
pascal@20689
|
84
|
pascal@20689
|
85 if os.path.exists(directory):
|
pascal@20689
|
86 return False
|
pascal@20689
|
87
|
pascal@20689
|
88 os.makedirs(directory)
|
pascal@20689
|
89 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
|
pascal@20689
|
90
|
pascal@20689
|
91 try:
|
pascal@20689
|
92 message = Message(directory, msg)
|
pascal@20689
|
93 @@ -86,9 +123,12 @@
|
pascal@20689
|
94 if self.wkhtmltopdf:
|
pascal@20689
|
95 message.createPdfFile(self.wkhtmltopdf)
|
pascal@20689
|
96
|
pascal@20689
|
97 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
|
pascal@20689
|
98 +
|
pascal@20689
|
99 except Exception as e:
|
pascal@20689
|
100 # ex: Unsupported charset on decode
|
pascal@20689
|
101 print(directory)
|
pascal@20689
|
102 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
|
pascal@20689
|
103 if hasattr(e, 'strerror'):
|
pascal@20689
|
104 print("MailboxClient.saveEmail() failed:", e.strerror)
|
pascal@20689
|
105 else:
|
pascal@20689
|
106 --- imapbox/message.py
|
pascal@20689
|
107 +++ imapbox/message.py
|
pascal@20689
|
108 @@ -15,6 +15,8 @@
|
pascal@20689
|
109 import cgi
|
pascal@20689
|
110 import time
|
pascal@20689
|
111 import pkgutil
|
pascal@20689
|
112 +import rfc822
|
pascal@20689
|
113 +import sys
|
pascal@20689
|
114
|
pascal@20689
|
115 from six.moves import html_parser
|
pascal@20689
|
116
|
pascal@20689
|
117 @@ -82,14 +84,15 @@
|
pascal@20689
|
118 except email.Errors.HeaderParseError:
|
pascal@20689
|
119 # This already append in email.base64mime.decode()
|
pascal@20689
|
120 # instead return a sanitized ascii string
|
pascal@20689
|
121 - return header_text.encode('ascii', 'replace').decode('ascii')
|
pascal@20689
|
122 + return header_text.encode('ascii', 'replace').decode(default)
|
pascal@20689
|
123 else:
|
pascal@20689
|
124 for i, (text, charset) in enumerate(headers):
|
pascal@20689
|
125 headers[i]=text
|
pascal@20689
|
126 if charset:
|
pascal@20689
|
127 - headers[i]=str(text, charset)
|
pascal@20689
|
128 + text = unicode(text,charset)
|
pascal@20689
|
129 + headers[i]=text
|
pascal@20689
|
130 else:
|
pascal@20689
|
131 - headers[i]=str(text)
|
pascal@20689
|
132 + headers[i]=text.decode('utf-8')
|
pascal@20689
|
133 return u"".join(headers)
|
pascal@20689
|
134
|
pascal@20689
|
135
|
pascal@20689
|
136 @@ -102,21 +105,27 @@
|
pascal@20689
|
137 # use the same for both and see later
|
pascal@20689
|
138 name=addr
|
pascal@20689
|
139
|
pascal@20689
|
140 + headers=decode_header(name)
|
pascal@20689
|
141 try:
|
pascal@20689
|
142 - # address must be ascii only
|
pascal@20689
|
143 - addr=addr.encode('ascii')
|
pascal@20689
|
144 + addr = addr.encode("ascii")
|
pascal@20689
|
145 except UnicodeError:
|
pascal@20689
|
146 - addr=''
|
pascal@20689
|
147 + addr = ''
|
pascal@20689
|
148 else:
|
pascal@20689
|
149 # address must match adress regex
|
pascal@20689
|
150 if not email_address_re.match(addr.decode("utf-8")):
|
pascal@20689
|
151 addr=''
|
pascal@20689
|
152 - addrs[i]=(self.getmailheader(name), addr.decode("utf-8"))
|
pascal@20689
|
153 + addrs[i]=(self.getmailheader(name), "utf-8")
|
pascal@20689
|
154 return addrs
|
pascal@20689
|
155
|
pascal@20689
|
156 def getSubject(self):
|
pascal@20689
|
157 - if not hasattr(self, 'subject'):
|
pascal@20689
|
158 - self.subject = self.getmailheader(self.msg.get('Subject', ''))
|
pascal@20689
|
159 +# if not hasattr(self, 'subject'):
|
pascal@20689
|
160 +# self.subject = self.getmailheader(self.msg.get('Subject'),'')
|
pascal@20689
|
161 + headers=decode_header(self.msg.get('Subject'))
|
pascal@20689
|
162 + for i, (text, charset) in enumerate(headers):
|
pascal@20689
|
163 + if charset:
|
pascal@20689
|
164 + self.subject = unicode(text,charset)
|
pascal@20689
|
165 + else:
|
pascal@20689
|
166 + self.subject=text.decode('utf-8')
|
pascal@20689
|
167 return self.subject
|
pascal@20689
|
168
|
pascal@20689
|
169 def getFrom(self):
|
pascal@20689
|
170 @@ -172,6 +181,7 @@
|
pascal@20689
|
171 json_file.write(data)
|
pascal@20689
|
172
|
pascal@20689
|
173 json_file.close()
|
pascal@20689
|
174 + os.utime('%s/metadata.json' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
175
|
pascal@20689
|
176
|
pascal@20689
|
177
|
pascal@20689
|
178 @@ -180,6 +190,7 @@
|
pascal@20689
|
179 f = gzip.open('%s/raw.eml.gz' %(self.directory), 'wb')
|
pascal@20689
|
180 f.write(data)
|
pascal@20689
|
181 f.close()
|
pascal@20689
|
182 + os.utime('%s/raw.eml.gz' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
183
|
pascal@20689
|
184
|
pascal@20689
|
185 def getPartCharset(self, part):
|
pascal@20689
|
186 @@ -192,14 +203,23 @@
|
pascal@20689
|
187 if not hasattr(self, 'text_content'):
|
pascal@20689
|
188 self.text_content = ''
|
pascal@20689
|
189 for part in parts:
|
pascal@20689
|
190 + raw_content_0 = part.get_payload()
|
pascal@20689
|
191 raw_content = part.get_payload(decode=True)
|
pascal@20689
|
192 charset = self.getPartCharset(part)
|
pascal@20689
|
193 self.text_content += raw_content.decode(charset, "replace")
|
pascal@20689
|
194 + if charset!='utf-8' and self.text_content:
|
pascal@20689
|
195 + try:
|
pascal@20689
|
196 + self.text_content=self.text_content.encode('raw_unicode_escape').decode('utf-8') # Double-decoding unicode
|
pascal@20689
|
197 + except UnicodeError:
|
pascal@20689
|
198 + self.text_content=raw_content_0.decode('utf_8')
|
pascal@20689
|
199 return self.text_content
|
pascal@20689
|
200
|
pascal@20689
|
201
|
pascal@20689
|
202 def createTextFile(self, parts):
|
pascal@20689
|
203 - utf8_content = self.getTextContent(parts)
|
pascal@20689
|
204 + intro=''
|
pascal@20689
|
205 + if self.getSubject():
|
pascal@20689
|
206 + intro='==> ' + self.getSubject() + '\r\n\r\n'
|
pascal@20689
|
207 + utf8_content = intro + self.getTextContent(parts)
|
pascal@20689
|
208 with open(os.path.join(self.directory, 'message.txt'), 'wb') as fp:
|
pascal@20689
|
209 fp.write(bytearray(utf8_content, 'utf-8'))
|
pascal@20689
|
210
|
pascal@20689
|
211 @@ -208,9 +228,15 @@
|
pascal@20689
|
212 self.html_content = ''
|
pascal@20689
|
213
|
pascal@20689
|
214 for part in parts:
|
pascal@20689
|
215 + raw_content_0 = part.get_payload()
|
pascal@20689
|
216 raw_content = part.get_payload(decode=True)
|
pascal@20689
|
217 charset = self.getPartCharset(part)
|
pascal@20689
|
218 self.html_content += raw_content.decode(charset, "replace")
|
pascal@20689
|
219 + if charset!='utf-8' and self.html_content:
|
pascal@20689
|
220 + try:
|
pascal@20689
|
221 + self.html_content.encode('raw_unicode_escape').decode('utf-8') # Double-decoding unicode
|
pascal@20689
|
222 + except UnicodeError:
|
pascal@20689
|
223 + self.html_content=raw_content_0.decode('utf_8')
|
pascal@20689
|
224
|
pascal@20689
|
225 m = re.search('<body[^>]*>(.+)<\/body>', self.html_content, re.S | re.I)
|
pascal@20689
|
226 if (m != None):
|
pascal@20689
|
227 @@ -304,9 +330,11 @@
|
pascal@20689
|
228
|
pascal@20689
|
229 if message_parts['text']:
|
pascal@20689
|
230 self.createTextFile(message_parts['text'])
|
pascal@20689
|
231 + os.utime('%s/message.txt' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
232
|
pascal@20689
|
233 if message_parts['html']:
|
pascal@20689
|
234 self.createHtmlFile(message_parts['html'], message_parts['embed_images'])
|
pascal@20689
|
235 + os.utime('%s/message.html' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
236
|
pascal@20689
|
237 if message_parts['files']:
|
pascal@20689
|
238 attdir = os.path.join(self.directory, 'attachments')
|
pascal@20689
|
239 @@ -317,6 +345,8 @@
|
pascal@20689
|
240 payload = afile[0].get_payload(decode=True)
|
pascal@20689
|
241 if payload:
|
pascal@20689
|
242 fp.write(payload)
|
pascal@20689
|
243 + os.utime('%s/attachments/%s' %(self.directory,afile[1]),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
244 + os.utime('%s/attachments' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
245
|
pascal@20689
|
246
|
pascal@20689
|
247 def createPdfFile(self, wkhtmltopdf):
|
pascal@20689
|
248 @@ -325,5 +355,6 @@
|
pascal@20689
|
249 pdf_path = os.path.join(self.directory, 'message.pdf')
|
pascal@20689
|
250 config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf)
|
pascal@20689
|
251 pdfkit.from_file(html_path, pdf_path, configuration=config)
|
pascal@20689
|
252 + os.utime('%s/message.pdf' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
|
pascal@20689
|
253 else:
|
pascal@20689
|
254 print("Couldn't create PDF message, since \"pdfkit\" module isn't installed.")
|