repair: Also fix hrefs, stricter safe-set

Fix #236
This commit is contained in:
Markus Unterwaditzer 2015-07-26 10:06:39 +02:00
parent 93480c059f
commit 10148f47f8
3 changed files with 41 additions and 19 deletions

View file

@ -2,8 +2,11 @@
from textwrap import dedent
import pytest
from vdirsyncer.repair import repair_storage
from vdirsyncer.storage.memory import MemoryStorage
from vdirsyncer.utils import href_safe
from vdirsyncer.utils.vobject import Item
@ -23,15 +26,24 @@ def test_repair_uids():
assert uid1 != uid2
def test_repair_nonascii_uids():
@pytest.mark.parametrize('uid', [
u'äää',
u'test@foo',
u'test foo',
])
def test_repair_unsafe_uids(uid):
assert not href_safe(uid)
s = MemoryStorage()
href, etag = s.upload(Item(u'BEGIN:VCARD\nUID:äää\nEND:VCARD'))
assert s.get(href)[0].uid == u'äää'
href, etag = s.upload(Item(u'BEGIN:VCARD\nUID:{}\nEND:VCARD'.format(uid)))
assert s.get(href)[0].uid == uid
repair_storage(s)
new_href = list(s.list())[0][0]
assert href_safe(new_href)
newuid = s.get(new_href)[0].uid
assert newuid != u'äää'
assert newuid.encode('ascii', 'ignore').decode('ascii') == newuid
assert href_safe(newuid)
def test_full(tmpdir, runner):

View file

@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import uuid
from os.path import basename
from . import log
from .utils import generate_href, href_safe
from .utils.vobject import Item
logger = log.get(__name__)
@ -16,24 +18,23 @@ def repair_storage(storage):
logger.info(u'[{}/{}] Processing {}'
.format(i, len(all_hrefs), href))
parsed = item.parsed
changed = False
if parsed is None:
if item.parsed is None:
logger.warning('Item {} can\'t be parsed, skipping.'
.format(href))
continue
if item.uid is None:
if not item.uid:
logger.warning('No UID, assigning random one.')
changed = reroll_uid(parsed) or changed
changed = change_uid(item, generate_href()) or changed
elif item.uid in seen_uids:
logger.warning('Duplicate UID, assigning random one.')
changed = reroll_uid(parsed) or changed
elif item.uid.encode('ascii', 'ignore').decode('ascii') != item.uid:
logger.warning('UID is non-ascii, assigning random one.')
changed = reroll_uid(parsed) or changed
changed = change_uid(item, generate_href()) or changed
elif not href_safe(item.uid) or not href_safe(basename(href)):
logger.warning('UID or href is unsafe, assigning random UID.')
changed = change_uid(item, generate_href(item.uid)) or changed
new_item = Item(u'\r\n'.join(parsed.dump_lines()))
new_item = Item(u'\r\n'.join(item.parsed.dump_lines()))
assert new_item.uid
seen_uids.add(new_item.uid)
if changed:
@ -47,9 +48,8 @@ def repair_storage(storage):
logger.exception('Server rejected new item.')
def reroll_uid(component):
new_uid = uuid.uuid4()
stack = [component]
def change_uid(item, new_uid):
stack = [item.parsed]
changed = False
while stack:
component = stack.pop()

View file

@ -164,8 +164,18 @@ class cached_property(object):
return result
def href_safe(ident, safe=SAFE_UID_CHARS):
return not bool(set(ident) - set(safe))
def generate_href(ident=None, safe=SAFE_UID_CHARS):
if not ident or set(ident) - set(safe):
'''
Generate a safe identifier, suitable for URLs, storage hrefs or UIDs.
If the given ident string is safe, it will be returned, otherwise a random
UUID.
'''
if not ident or not href_safe(ident, safe):
return to_unicode(uuid.uuid4().hex)
else:
return ident