From 10148f47f8eff539bd43a0b401242f4a2ba1a36c Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sun, 26 Jul 2015 10:06:39 +0200 Subject: [PATCH] repair: Also fix hrefs, stricter safe-set Fix #236 --- tests/cli/test_repair.py | 22 +++++++++++++++++----- vdirsyncer/repair.py | 26 +++++++++++++------------- vdirsyncer/utils/__init__.py | 12 +++++++++++- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/tests/cli/test_repair.py b/tests/cli/test_repair.py index 8d253b5..c1987c6 100644 --- a/tests/cli/test_repair.py +++ b/tests/cli/test_repair.py @@ -2,8 +2,11 @@ from textwrap import dedent +import pytest + from vdirsyncer.repair import repair_storage from vdirsyncer.storage.memory import MemoryStorage +from vdirsyncer.utils import href_safe from vdirsyncer.utils.vobject import Item @@ -23,15 +26,24 @@ def test_repair_uids(): assert uid1 != uid2 -def test_repair_nonascii_uids(): +@pytest.mark.parametrize('uid', [ + u'äää', + u'test@foo', + u'test foo', +]) +def test_repair_unsafe_uids(uid): + assert not href_safe(uid) + s = MemoryStorage() - href, etag = s.upload(Item(u'BEGIN:VCARD\nUID:äää\nEND:VCARD')) - assert s.get(href)[0].uid == u'äää' + href, etag = s.upload(Item(u'BEGIN:VCARD\nUID:{}\nEND:VCARD'.format(uid))) + assert s.get(href)[0].uid == uid + repair_storage(s) + new_href = list(s.list())[0][0] + assert href_safe(new_href) newuid = s.get(new_href)[0].uid - assert newuid != u'äää' - assert newuid.encode('ascii', 'ignore').decode('ascii') == newuid + assert href_safe(newuid) def test_full(tmpdir, runner): diff --git a/vdirsyncer/repair.py b/vdirsyncer/repair.py index bfb0f0e..1f3fd6f 100644 --- a/vdirsyncer/repair.py +++ b/vdirsyncer/repair.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- -import uuid +from os.path import basename + from . import log +from .utils import generate_href, href_safe from .utils.vobject import Item logger = log.get(__name__) @@ -16,24 +18,23 @@ def repair_storage(storage): logger.info(u'[{}/{}] Processing {}' .format(i, len(all_hrefs), href)) - parsed = item.parsed changed = False - if parsed is None: + if item.parsed is None: logger.warning('Item {} can\'t be parsed, skipping.' .format(href)) continue - if item.uid is None: + if not item.uid: logger.warning('No UID, assigning random one.') - changed = reroll_uid(parsed) or changed + changed = change_uid(item, generate_href()) or changed elif item.uid in seen_uids: logger.warning('Duplicate UID, assigning random one.') - changed = reroll_uid(parsed) or changed - elif item.uid.encode('ascii', 'ignore').decode('ascii') != item.uid: - logger.warning('UID is non-ascii, assigning random one.') - changed = reroll_uid(parsed) or changed + changed = change_uid(item, generate_href()) or changed + elif not href_safe(item.uid) or not href_safe(basename(href)): + logger.warning('UID or href is unsafe, assigning random UID.') + changed = change_uid(item, generate_href(item.uid)) or changed - new_item = Item(u'\r\n'.join(parsed.dump_lines())) + new_item = Item(u'\r\n'.join(item.parsed.dump_lines())) assert new_item.uid seen_uids.add(new_item.uid) if changed: @@ -47,9 +48,8 @@ def repair_storage(storage): logger.exception('Server rejected new item.') -def reroll_uid(component): - new_uid = uuid.uuid4() - stack = [component] +def change_uid(item, new_uid): + stack = [item.parsed] changed = False while stack: component = stack.pop() diff --git a/vdirsyncer/utils/__init__.py b/vdirsyncer/utils/__init__.py index 5f926a4..93d1a19 100644 --- a/vdirsyncer/utils/__init__.py +++ b/vdirsyncer/utils/__init__.py @@ -164,8 +164,18 @@ class cached_property(object): return result +def href_safe(ident, safe=SAFE_UID_CHARS): + return not bool(set(ident) - set(safe)) + + def generate_href(ident=None, safe=SAFE_UID_CHARS): - if not ident or set(ident) - set(safe): + ''' + Generate a safe identifier, suitable for URLs, storage hrefs or UIDs. + + If the given ident string is safe, it will be returned, otherwise a random + UUID. + ''' + if not ident or not href_safe(ident, safe): return to_unicode(uuid.uuid4().hex) else: return ident