From dd5f76ca5dec2af3e15f87415ed8deb977d3eca2 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sun, 25 Sep 2016 13:42:37 +0200 Subject: [PATCH] ignore UIDs in http storage --- CHANGELOG.rst | 2 ++ tests/storage/test_http.py | 4 ++-- tests/storage/test_http_with_singlefile.py | 1 + vdirsyncer/repair.py | 23 ++++++---------------- vdirsyncer/storage/http.py | 19 ++++++++++++++++-- vdirsyncer/utils/vobject.py | 17 +++++++++++++++- 6 files changed, 44 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bea9d5f..9cd8b74 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -19,6 +19,8 @@ Version 0.13.0 console output is always unambigous. See :gh:`459`. - Custom commands can now be used for conflict resolution during sync. See :gh:`127`. +- :storage:`http` now completely ignores UIDs. This avoids a lot of unnecessary + down- and uploads. Version 0.12.1 ============== diff --git a/tests/storage/test_http.py b/tests/storage/test_http.py index 530c4d0..7c3fac6 100644 --- a/tests/storage/test_http.py +++ b/tests/storage/test_http.py @@ -54,7 +54,7 @@ def test_list(monkeypatch): for href, etag in s.list(): item, etag2 = s.get(href) - assert item.uid is None + assert item.uid is not None assert etag2 == etag found_items[normalize_item(item)] = href @@ -65,7 +65,7 @@ def test_list(monkeypatch): for href, etag in s.list(): item, etag2 = s.get(href) - assert item.uid is None + assert item.uid is not None assert etag2 == etag assert found_items[normalize_item(item)] == href diff --git a/tests/storage/test_http_with_singlefile.py b/tests/storage/test_http_with_singlefile.py index 554bcf3..b8bb8bd 100644 --- a/tests/storage/test_http_with_singlefile.py +++ b/tests/storage/test_http_with_singlefile.py @@ -24,6 +24,7 @@ class CombinedStorage(Storage): self.url = url self.path = path self._reader = vdirsyncer.storage.http.HttpStorage(url=url) + self._reader._ignore_uids = False self._writer = SingleFileStorage(path=path) def list(self, *a, **kw): diff --git a/vdirsyncer/repair.py b/vdirsyncer/repair.py index d0b2ce0..b0f1bdd 100644 --- a/vdirsyncer/repair.py +++ b/vdirsyncer/repair.py @@ -17,6 +17,8 @@ def repair_storage(storage): logger.info(u'[{}/{}] Processing {}' .format(i, len(all_hrefs), href)) + new_item = item + changed = False if item.parsed is None: logger.warning('Item {} can\'t be parsed, skipping.' @@ -25,15 +27,14 @@ def repair_storage(storage): if not item.uid: logger.warning('No UID, assigning random one.') - changed = change_uid(item, generate_href()) or changed + new_item = item.with_uid(generate_href()) elif item.uid in seen_uids: logger.warning('Duplicate UID, assigning random one.') - changed = change_uid(item, generate_href()) or changed + new_item = item.with_uid(generate_href()) elif not href_safe(item.uid) or not href_safe(basename(href)): logger.warning('UID or href is unsafe, assigning random UID.') - changed = change_uid(item, generate_href(item.uid)) or changed + new_item = item.with_uid(generate_href(item.uid)) - new_item = Item(u'\r\n'.join(item.parsed.dump_lines())) if not new_item.uid: logger.error('Item {!r} is malformed beyond repair. ' 'This is a serverside bug.' @@ -42,7 +43,7 @@ def repair_storage(storage): continue seen_uids.add(new_item.uid) - if changed: + if new_item.raw != item.raw: try: if new_item.uid != item.uid: storage.upload(new_item) @@ -53,15 +54,3 @@ def repair_storage(storage): logger.exception('Server rejected new item.') -def change_uid(item, new_uid): - stack = [item.parsed] - changed = False - while stack: - component = stack.pop() - stack.extend(component.subcomponents) - - if component.name in ('VEVENT', 'VTODO', 'VJOURNAL', 'VCARD'): - component['UID'] = new_uid - changed = True - - return changed diff --git a/vdirsyncer/storage/http.py b/vdirsyncer/storage/http.py index 7731c74..6c29a9c 100644 --- a/vdirsyncer/storage/http.py +++ b/vdirsyncer/storage/http.py @@ -95,6 +95,16 @@ HTTP_STORAGE_PARAMETERS = ''' class HttpStorage(Storage): __doc__ = ''' Use a simple ``.ics`` file (or similar) from the web. + ``webcal://``-calendars are supposed to be used with this, but you have to + replace ``webcal://`` with ``http://``, or better, ``https://``. + + Too many WebCAL providers generate UIDs of all ``VEVENT``-components + on-the-fly, i.e. all UIDs change every time the calendar is downloaded. + This leads many synchronization programs to believe that all events have + been deleted and new ones created, and accordingly causes a lot of + unnecessary uploads and deletions on the other side. Vdirsyncer completely + ignores UIDs coming from :storage:`http` and will replace them with a hash + of the normalized item content. :param url: URL to the ``.ics`` file. ''' + HTTP_STORAGE_PARAMETERS + ''' @@ -121,6 +131,9 @@ class HttpStorage(Storage): _repr_attributes = ('username', 'url') _items = None + # Required for tests. + _ignore_uids = True + def __init__(self, url, username='', password='', verify=True, auth=None, useragent=USERAGENT, verify_fingerprint=None, auth_cert=None, **kwargs): @@ -152,8 +165,10 @@ class HttpStorage(Storage): for item in split_collection(r.text): item = Item(item) - etag = item.hash - self._items[item.ident] = item, etag + if self._ignore_uids: + item = item.with_uid(item.hash) + + self._items[item.ident] = item, item.hash return ((href, etag) for href, (item, etag) in self._items.items()) diff --git a/vdirsyncer/utils/vobject.py b/vdirsyncer/utils/vobject.py index 21ed306..91bef69 100644 --- a/vdirsyncer/utils/vobject.py +++ b/vdirsyncer/utils/vobject.py @@ -40,6 +40,20 @@ class Item(object): assert isinstance(raw, str) self._raw = raw + def with_uid(self, new_uid): + parsed = _Component.parse(self.raw) + stack = [parsed] + while stack: + component = stack.pop() + stack.extend(component.subcomponents) + + if component.name in ('VEVENT', 'VTODO', 'VJOURNAL', 'VCARD'): + del component['UID'] + if new_uid: + component['UID'] = new_uid + + return Item('\r\n'.join(parsed.dump_lines())) + @cached_property def raw(self): '''Raw content of the item, as unicode string. @@ -79,8 +93,9 @@ class Item(object): # 2. The status file would contain really sensitive information. return self.uid or self.hash - @cached_property + @property def parsed(self): + '''Don't cache because the rv is mutable.''' try: return _Component.parse(self.raw) except Exception: