mirror of
https://github.com/samsonjs/imapbackup.git
synced 2026-04-27 14:57:44 +00:00
Improve performance on large folders by 10x or more
IMAP allows fetching message IDs for all messages in a folder at once and it's quite a bit faster than making individual requests. Running this on a folder with 4441 messages takes 26s for me now compared to over 8 minutes before this change.
This commit is contained in:
parent
0cc27cd570
commit
92cd9fb6cb
1 changed files with 25 additions and 11 deletions
|
|
@ -267,15 +267,29 @@ def scan_folder(server, foldername, nospinner):
|
||||||
raise SkipFolderException("SELECT failed: %s" % data)
|
raise SkipFolderException("SELECT failed: %s" % data)
|
||||||
num_msgs = int(data[0])
|
num_msgs = int(data[0])
|
||||||
|
|
||||||
# each message
|
# Retrieve all Message-Id headers, making sure we don't mark all messages as read.
|
||||||
for num in range(1, num_msgs+1):
|
#
|
||||||
# Retrieve Message-Id, making sure we don't mark all messages as read
|
# The result is an array of result tuples with a terminating closing parenthesis
|
||||||
typ, data = server.fetch(str(num), '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
|
# after each tuple. That means that the first result is at index 0, the second at
|
||||||
|
# 2, third at 4, and so on.
|
||||||
|
#
|
||||||
|
# e.g.
|
||||||
|
# [
|
||||||
|
# (b'1 (BODY[...', b'Message-Id: ...'), b')', # indices 0 and 1
|
||||||
|
# (b'2 (BODY[...', b'Message-Id: ...'), b')', # indices 2 and 3
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
if num_msgs > 0:
|
||||||
|
typ, data = server.fetch(f'1:{num_msgs}', '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
|
||||||
if 'OK' != typ:
|
if 'OK' != typ:
|
||||||
raise SkipFolderException("FETCH %s failed: %s" % (num, data))
|
raise SkipFolderException("FETCH failed: %s" % (data))
|
||||||
|
|
||||||
data_str = str(data[0][1], 'utf-8', 'replace')
|
# each message
|
||||||
|
for i in range(0, num_msgs):
|
||||||
|
num = 1 + i
|
||||||
|
|
||||||
|
# Double the index because of the terminating parenthesis after each tuple.
|
||||||
|
data_str = str(data[2 * i][1], 'utf-8', 'replace')
|
||||||
header = data_str.strip()
|
header = data_str.strip()
|
||||||
|
|
||||||
# remove newlines inside Message-Id (a dumb Exchange trait)
|
# remove newlines inside Message-Id (a dumb Exchange trait)
|
||||||
|
|
@ -288,12 +302,12 @@ def scan_folder(server, foldername, nospinner):
|
||||||
except (IndexError, AttributeError):
|
except (IndexError, AttributeError):
|
||||||
# Some messages may have no Message-Id, so we'll synthesise one
|
# Some messages may have no Message-Id, so we'll synthesise one
|
||||||
# (this usually happens with Sent, Drafts and .Mac news)
|
# (this usually happens with Sent, Drafts and .Mac news)
|
||||||
typ, data = server.fetch(
|
msg_typ, msg_data = server.fetch(
|
||||||
str(num), '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
|
str(num), '(BODY[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
|
||||||
if 'OK' != typ:
|
if 'OK' != msg_typ:
|
||||||
raise SkipFolderException(
|
raise SkipFolderException(
|
||||||
"FETCH %s failed: %s" % (num, data))
|
"FETCH %s failed: %s" % (num, msg_data))
|
||||||
data_str = str(data[0][1], 'utf-8', 'replace')
|
data_str = str(msg_data[0][1], 'utf-8', 'replace')
|
||||||
header = data_str.strip()
|
header = data_str.strip()
|
||||||
header = header.replace('\r\n', '\t').encode('utf-8')
|
header = header.replace('\r\n', '\t').encode('utf-8')
|
||||||
messages['<' + UUID + '.' +
|
messages['<' + UUID + '.' +
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue