wok-current annotate imapbox/stuff/slitaz.patch @ rev 23840

Up libsdl2-image (2.0.5), radare2 (4.4.0), screen (4.8.0), sleuthkit (4.9.0), thunderbird-bin (68.9.0)
author Pascal Bellard <pascal.bellard@slitaz.org>
date Thu Jun 04 16:22:12 2020 +0000 (2020-06-04)
parents
children
rev   line source
pascal@20689 1 --- imapbox/mailboxresource.py
pascal@20689 2 +++ imapbox/mailboxresource.py
pascal@20689 3 @@ -9,6 +9,8 @@
pascal@20689 4 import hashlib
pascal@20689 5 from message import Message
pascal@20689 6 import datetime
pascal@20689 7 +import time
pascal@20689 8 +import rfc822
pascal@20689 9
pascal@20689 10
pascal@20689 11
pascal@20689 12 @@ -16,10 +18,22 @@
pascal@20689 13 """Operations on a mailbox"""
pascal@20689 14
pascal@20689 15 def __init__(self, host, port, username, password, remote_folder):
pascal@20689 16 + if port == '143':
pascal@20689 17 + self.mailbox = imaplib.IMAP4(host, port)
pascal@20689 18 + elif port == '993':
pascal@20689 19 self.mailbox = imaplib.IMAP4_SSL(host, port)
pascal@20689 20 self.mailbox.login(username, password)
pascal@20689 21 self.mailbox.select(remote_folder, readonly=True)
pascal@20689 22
pascal@20689 23 + if remote_folder=='INBOX':
pascal@20689 24 + self.remote_folder=''
pascal@20689 25 + else
pascal@20689 26 + r_f=re.sub('INBOX','',remote_folder)
pascal@20689 27 + if re.findall('^\.',r_f):
pascal@20689 28 + self.remote_folder=re.sub('\.','/',rf)
pascal@20689 29 + else:
pascal@20689 30 + self.remote_folder='/'+r_f
pascal@20689 31 +
pascal@20689 32 def copy_emails(self, days, local_folder, wkhtmltopdf):
pascal@20689 33
pascal@20689 34 n_saved = 0
pascal@20689 35 @@ -50,9 +64,19 @@
pascal@20689 36
pascal@20689 37
pascal@20689 38 def getEmailFolder(self, msg, data):
pascal@20689 39 - if msg['Message-Id']:
pascal@20689 40 - foldername = re.sub('[^a-zA-Z0-9_\-\.()\s]+', '', msg['Message-Id'])
pascal@20689 41 + if msg['To'] and re.findall('Sent',self.remote_folder):
pascal@20689 42 + foldername = re.findall("<.*>", msg['To'])
pascal@20689 43 + if foldername:
pascal@20689 44 + foldername = re.sub('[\<\>]+', '', foldername[0])
pascal@20689 45 else:
pascal@20689 46 + foldername=msg['To']
pascal@20689 47 + elif msg['From']:
pascal@20689 48 + foldername = re.findall("<.*>", msg['From'])
pascal@20689 49 + if foldername:
pascal@20689 50 + foldername = re.sub('[\<\>]+', '', foldername[0])
pascal@20689 51 + else:
pascal@20689 52 + foldername=msg['From']
pascal@20689 53 + else:
pascal@20689 54 foldername = hashlib.sha224(data).hexdigest()
pascal@20689 55
pascal@20689 56 year = 'None'
pascal@20689 57 @@ -62,20 +86,33 @@
pascal@20689 58 year = match.group(1)
pascal@20689 59
pascal@20689 60
pascal@20689 61 - return os.path.join(self.local_folder, year, foldername)
pascal@20689 62 + return os.path.join(self.local_folder+self.remote_folder, self.normalizeDate(msg['Date'])+'_'+foldername)
pascal@20689 63
pascal@20689 64 + def normalizeDate(self, datestr):
pascal@20689 65 + t = email.utils.parsedate_tz(datestr)
pascal@20689 66 + timeval = time.mktime(t[:-1])
pascal@20689 67 + date = email.utils.formatdate(timeval, True)
pascal@20689 68 + utc = time.gmtime(email.utils.mktime_tz(t))
pascal@20689 69 +# rfc2822 = '{} {:+03d}00'.format(date[:-6], t[9]//3600)
pascal@20689 70 + iso8601 = time.strftime('%Y%m%dT%H%M%SZ', utc)
pascal@20689 71
pascal@20689 72 + return (iso8601)
pascal@20689 73
pascal@20689 74 +
pascal@20689 75 def saveEmail(self, data):
pascal@20689 76 for response_part in data:
pascal@20689 77 if isinstance(response_part, tuple):
pascal@20689 78 - msg = email.message_from_string(response_part[1].decode("utf-8"))
pascal@20689 79 + try:
pascal@20689 80 + msg = email.message_from_string(re.sub('^>', '', response_part[1]).decode("utf-8")) # supprime '>' dans l'email
pascal@20689 81 + except UnicodeError:
pascal@20689 82 + msg = email.message_from_string(re.sub('^>', '', response_part[1]).decode('latin1').encode('utf-8')) # supprime '>' dans l'email
pascal@20689 83 directory = self.getEmailFolder(msg, data[0][1])
pascal@20689 84
pascal@20689 85 if os.path.exists(directory):
pascal@20689 86 return False
pascal@20689 87
pascal@20689 88 os.makedirs(directory)
pascal@20689 89 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
pascal@20689 90
pascal@20689 91 try:
pascal@20689 92 message = Message(directory, msg)
pascal@20689 93 @@ -86,9 +123,12 @@
pascal@20689 94 if self.wkhtmltopdf:
pascal@20689 95 message.createPdfFile(self.wkhtmltopdf)
pascal@20689 96
pascal@20689 97 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
pascal@20689 98 +
pascal@20689 99 except Exception as e:
pascal@20689 100 # ex: Unsupported charset on decode
pascal@20689 101 print(directory)
pascal@20689 102 + os.utime(directory,(time.mktime(rfc822.parsedate(msg['Date'])), time.mktime(rfc822.parsedate(msg['Date']))))
pascal@20689 103 if hasattr(e, 'strerror'):
pascal@20689 104 print("MailboxClient.saveEmail() failed:", e.strerror)
pascal@20689 105 else:
pascal@20689 106 --- imapbox/message.py
pascal@20689 107 +++ imapbox/message.py
pascal@20689 108 @@ -15,6 +15,8 @@
pascal@20689 109 import cgi
pascal@20689 110 import time
pascal@20689 111 import pkgutil
pascal@20689 112 +import rfc822
pascal@20689 113 +import sys
pascal@20689 114
pascal@20689 115 from six.moves import html_parser
pascal@20689 116
pascal@20689 117 @@ -82,14 +84,15 @@
pascal@20689 118 except email.Errors.HeaderParseError:
pascal@20689 119 # This already append in email.base64mime.decode()
pascal@20689 120 # instead return a sanitized ascii string
pascal@20689 121 - return header_text.encode('ascii', 'replace').decode('ascii')
pascal@20689 122 + return header_text.encode('ascii', 'replace').decode(default)
pascal@20689 123 else:
pascal@20689 124 for i, (text, charset) in enumerate(headers):
pascal@20689 125 headers[i]=text
pascal@20689 126 if charset:
pascal@20689 127 - headers[i]=str(text, charset)
pascal@20689 128 + text = unicode(text,charset)
pascal@20689 129 + headers[i]=text
pascal@20689 130 else:
pascal@20689 131 - headers[i]=str(text)
pascal@20689 132 + headers[i]=text.decode('utf-8')
pascal@20689 133 return u"".join(headers)
pascal@20689 134
pascal@20689 135
pascal@20689 136 @@ -102,21 +105,27 @@
pascal@20689 137 # use the same for both and see later
pascal@20689 138 name=addr
pascal@20689 139
pascal@20689 140 + headers=decode_header(name)
pascal@20689 141 try:
pascal@20689 142 - # address must be ascii only
pascal@20689 143 - addr=addr.encode('ascii')
pascal@20689 144 + addr = addr.encode("ascii")
pascal@20689 145 except UnicodeError:
pascal@20689 146 - addr=''
pascal@20689 147 + addr = ''
pascal@20689 148 else:
pascal@20689 149 # address must match adress regex
pascal@20689 150 if not email_address_re.match(addr.decode("utf-8")):
pascal@20689 151 addr=''
pascal@20689 152 - addrs[i]=(self.getmailheader(name), addr.decode("utf-8"))
pascal@20689 153 + addrs[i]=(self.getmailheader(name), "utf-8")
pascal@20689 154 return addrs
pascal@20689 155
pascal@20689 156 def getSubject(self):
pascal@20689 157 - if not hasattr(self, 'subject'):
pascal@20689 158 - self.subject = self.getmailheader(self.msg.get('Subject', ''))
pascal@20689 159 +# if not hasattr(self, 'subject'):
pascal@20689 160 +# self.subject = self.getmailheader(self.msg.get('Subject'),'')
pascal@20689 161 + headers=decode_header(self.msg.get('Subject'))
pascal@20689 162 + for i, (text, charset) in enumerate(headers):
pascal@20689 163 + if charset:
pascal@20689 164 + self.subject = unicode(text,charset)
pascal@20689 165 + else:
pascal@20689 166 + self.subject=text.decode('utf-8')
pascal@20689 167 return self.subject
pascal@20689 168
pascal@20689 169 def getFrom(self):
pascal@20689 170 @@ -172,6 +181,7 @@
pascal@20689 171 json_file.write(data)
pascal@20689 172
pascal@20689 173 json_file.close()
pascal@20689 174 + os.utime('%s/metadata.json' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 175
pascal@20689 176
pascal@20689 177
pascal@20689 178 @@ -180,6 +190,7 @@
pascal@20689 179 f = gzip.open('%s/raw.eml.gz' %(self.directory), 'wb')
pascal@20689 180 f.write(data)
pascal@20689 181 f.close()
pascal@20689 182 + os.utime('%s/raw.eml.gz' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 183
pascal@20689 184
pascal@20689 185 def getPartCharset(self, part):
pascal@20689 186 @@ -192,14 +203,23 @@
pascal@20689 187 if not hasattr(self, 'text_content'):
pascal@20689 188 self.text_content = ''
pascal@20689 189 for part in parts:
pascal@20689 190 + raw_content_0 = part.get_payload()
pascal@20689 191 raw_content = part.get_payload(decode=True)
pascal@20689 192 charset = self.getPartCharset(part)
pascal@20689 193 self.text_content += raw_content.decode(charset, "replace")
pascal@20689 194 + if charset!='utf-8' and self.text_content:
pascal@20689 195 + try:
pascal@20689 196 + self.text_content=self.text_content.encode('raw_unicode_escape').decode('utf-8') # Double-decoding unicode
pascal@20689 197 + except UnicodeError:
pascal@20689 198 + self.text_content=raw_content_0.decode('utf_8')
pascal@20689 199 return self.text_content
pascal@20689 200
pascal@20689 201
pascal@20689 202 def createTextFile(self, parts):
pascal@20689 203 - utf8_content = self.getTextContent(parts)
pascal@20689 204 + intro=''
pascal@20689 205 + if self.getSubject():
pascal@20689 206 + intro='==> ' + self.getSubject() + '\r\n\r\n'
pascal@20689 207 + utf8_content = intro + self.getTextContent(parts)
pascal@20689 208 with open(os.path.join(self.directory, 'message.txt'), 'wb') as fp:
pascal@20689 209 fp.write(bytearray(utf8_content, 'utf-8'))
pascal@20689 210
pascal@20689 211 @@ -208,9 +228,15 @@
pascal@20689 212 self.html_content = ''
pascal@20689 213
pascal@20689 214 for part in parts:
pascal@20689 215 + raw_content_0 = part.get_payload()
pascal@20689 216 raw_content = part.get_payload(decode=True)
pascal@20689 217 charset = self.getPartCharset(part)
pascal@20689 218 self.html_content += raw_content.decode(charset, "replace")
pascal@20689 219 + if charset!='utf-8' and self.html_content:
pascal@20689 220 + try:
pascal@20689 221 + self.html_content.encode('raw_unicode_escape').decode('utf-8') # Double-decoding unicode
pascal@20689 222 + except UnicodeError:
pascal@20689 223 + self.html_content=raw_content_0.decode('utf_8')
pascal@20689 224
pascal@20689 225 m = re.search('<body[^>]*>(.+)<\/body>', self.html_content, re.S | re.I)
pascal@20689 226 if (m != None):
pascal@20689 227 @@ -304,9 +330,11 @@
pascal@20689 228
pascal@20689 229 if message_parts['text']:
pascal@20689 230 self.createTextFile(message_parts['text'])
pascal@20689 231 + os.utime('%s/message.txt' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 232
pascal@20689 233 if message_parts['html']:
pascal@20689 234 self.createHtmlFile(message_parts['html'], message_parts['embed_images'])
pascal@20689 235 + os.utime('%s/message.html' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 236
pascal@20689 237 if message_parts['files']:
pascal@20689 238 attdir = os.path.join(self.directory, 'attachments')
pascal@20689 239 @@ -317,6 +345,8 @@
pascal@20689 240 payload = afile[0].get_payload(decode=True)
pascal@20689 241 if payload:
pascal@20689 242 fp.write(payload)
pascal@20689 243 + os.utime('%s/attachments/%s' %(self.directory,afile[1]),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 244 + os.utime('%s/attachments' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 245
pascal@20689 246
pascal@20689 247 def createPdfFile(self, wkhtmltopdf):
pascal@20689 248 @@ -325,5 +355,6 @@
pascal@20689 249 pdf_path = os.path.join(self.directory, 'message.pdf')
pascal@20689 250 config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf)
pascal@20689 251 pdfkit.from_file(html_path, pdf_path, configuration=config)
pascal@20689 252 + os.utime('%s/message.pdf' %(self.directory),(time.mktime(rfc822.parsedate(self.msg['Date'])), time.mktime(rfc822.parsedate(self.msg['Date']))))
pascal@20689 253 else:
pascal@20689 254 print("Couldn't create PDF message, since \"pdfkit\" module isn't installed.")