Improve performance on large folders by 10x or more

IMAP allows fetching message IDs for all messages in a folder at once
and it's quite a bit faster than making individual requests. Running
this on a folder with 4441 messages takes 26s for me now compared to
over 8 minutes before this change.
This commit is contained in:
Sami Samhuri 2022-01-01 16:56:43 -08:00
parent 0cc27cd570
commit 92cd9fb6cb
No known key found for this signature in database
GPG key ID: 4B4195422742FC16

View file

@ -267,15 +267,29 @@ def scan_folder(server, foldername, nospinner):
raise SkipFolderException("SELECT failed: %s" % data) raise SkipFolderException("SELECT failed: %s" % data)
num_msgs = int(data[0]) num_msgs = int(data[0])
# each message # Retrieve all Message-Id headers, making sure we don't mark all messages as read.
for num in range(1, num_msgs+1): #
# Retrieve Message-Id, making sure we don't mark all messages as read # The result is an array of result tuples with a terminating closing parenthesis
typ, data = server.fetch(str(num), '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])') # after each tuple. That means that the first result is at index 0, the second at
# 2, third at 4, and so on.
#
# e.g.
# [
# (b'1 (BODY[...', b'Message-Id: ...'), b')', # indices 0 and 1
# (b'2 (BODY[...', b'Message-Id: ...'), b')', # indices 2 and 3
# ...
# ]
if num_msgs > 0:
typ, data = server.fetch(f'1:{num_msgs}', '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
if 'OK' != typ: if 'OK' != typ:
raise SkipFolderException("FETCH %s failed: %s" % (num, data)) raise SkipFolderException("FETCH failed: %s" % (data))
data_str = str(data[0][1], 'utf-8', 'replace') # each message
for i in range(0, num_msgs):
num = 1 + i
# Double the index because of the terminating parenthesis after each tuple.
data_str = str(data[2 * i][1], 'utf-8', 'replace')
header = data_str.strip() header = data_str.strip()
# remove newlines inside Message-Id (a dumb Exchange trait) # remove newlines inside Message-Id (a dumb Exchange trait)
@ -288,12 +302,12 @@ def scan_folder(server, foldername, nospinner):
except (IndexError, AttributeError): except (IndexError, AttributeError):
# Some messages may have no Message-Id, so we'll synthesise one # Some messages may have no Message-Id, so we'll synthesise one
# (this usually happens with Sent, Drafts and .Mac news) # (this usually happens with Sent, Drafts and .Mac news)
typ, data = server.fetch( msg_typ, msg_data = server.fetch(
str(num), '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])') str(num), '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
if 'OK' != typ: if 'OK' != msg_typ:
raise SkipFolderException( raise SkipFolderException(
"FETCH %s failed: %s" % (num, data)) "FETCH %s failed: %s" % (num, msg_data))
data_str = str(data[0][1], 'utf-8', 'replace') data_str = str(msg_data[0][1], 'utf-8', 'replace')
header = data_str.strip() header = data_str.strip()
header = header.replace('\r\n', '\t').encode('utf-8') header = header.replace('\r\n', '\t').encode('utf-8')
messages['<' + UUID + '.' + messages['<' + UUID + '.' +