From 263a45e2a52e4c7a620ad7755d8814183112be9f Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 10 Jun 2015 18:06:39 +0200 Subject: [PATCH 1/2] Fix unicode URLs --- tests/storage/dav/test_main.py | 13 ++++++++++++- vdirsyncer/storage/dav.py | 10 +++++----- vdirsyncer/storage/filesystem.py | 15 ++++++--------- vdirsyncer/utils/__init__.py | 12 +++++++++++- vdirsyncer/utils/compat.py | 27 ++++++++++++++++++++++++--- 5 files changed, 58 insertions(+), 19 deletions(-) diff --git a/tests/storage/dav/test_main.py b/tests/storage/dav/test_main.py index 1ac356f..e37f240 100644 --- a/tests/storage/dav/test_main.py +++ b/tests/storage/dav/test_main.py @@ -9,7 +9,8 @@ import pytest import requests import requests.exceptions -from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE +from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \ + assert_item_equals import vdirsyncer.exceptions as exceptions from vdirsyncer.storage.base import Item @@ -48,6 +49,16 @@ class DavStorageTests(ServerMixin, StorageTests): assert list(s.get_multi([])) == [] + def test_dav_unicode_href(self, s, get_item, monkeypatch): + if self.dav_server != 'radicale': + # Radicale is unable to deal with unicode hrefs + monkeypatch.setattr(s, '_get_href', + lambda item: item.ident + s.fileext) + item = get_item(uid=u'lolätvdirsynceröü град сатану') + href, etag = s.upload(item) + item2, etag2 = s.get(href) + assert_item_equals(item, item2) + class TestCaldavStorage(DavStorageTests): storage_class = CaldavStorage diff --git a/vdirsyncer/storage/dav.py b/vdirsyncer/storage/dav.py index 1f73ba9..30142e3 100644 --- a/vdirsyncer/storage/dav.py +++ b/vdirsyncer/storage/dav.py @@ -11,7 +11,7 @@ from .base import Item, Storage from .http import HTTP_STORAGE_PARAMETERS, USERAGENT, prepare_auth, \ prepare_client_cert, prepare_verify from .. import exceptions, log, utils -from ..utils.compat import text_type +from ..utils.compat import text_type, to_native dav_logger = log.get(__name__) @@ -22,6 +22,8 @@ CALDAV_DT_FORMAT = '%Y%m%dT%H%M%SZ' def _normalize_href(base, href): '''Normalize the href to be a path only relative to hostname and schema.''' + base = to_native(base, 'utf-8') + href = to_native(href, 'utf-8') if not href: raise ValueError(href) x = utils.compat.urlparse.urljoin(base, href) @@ -363,9 +365,7 @@ class DavStorage(Storage): return _normalize_href(self.session.url, *args, **kwargs) def _get_href(self, item): - href = item.ident - for char in self.unsafe_href_chars: - href = href.replace(char, '_') + href = utils.generate_href(item.ident, unsafe=self.unsafe_href_chars) return self._normalize_href(href + self.fileext) def _is_item_mimetype(self, mimetype): @@ -440,7 +440,7 @@ class DavStorage(Storage): headers=headers ) etag = response.headers.get('etag', None) - href = self._normalize_href(_decode_url(response.url)) + href = _decode_url(self._normalize_href(response.url)) if not etag: dav_logger.warning('Race condition detected with server {}, ' 'consider using an alternative.' diff --git a/vdirsyncer/storage/filesystem.py b/vdirsyncer/storage/filesystem.py index 68658f0..63b7f1c 100644 --- a/vdirsyncer/storage/filesystem.py +++ b/vdirsyncer/storage/filesystem.py @@ -3,13 +3,12 @@ import errno import os import subprocess -import uuid from atomicwrites import atomic_write from .base import Item, Storage from .. import exceptions, log -from ..utils import checkdir, expand_path, get_etag_from_file, \ +from ..utils import checkdir, expand_path, generate_href, get_etag_from_file, \ get_etag_from_fileobject from ..utils.compat import text_type @@ -81,13 +80,10 @@ class FilesystemStorage(Storage): def _get_filepath(self, href): return os.path.join(self.path, href) - def _deterministic_href(self, item): + def _get_href(self, ident): # XXX: POSIX only defines / and \0 as invalid chars, but we should make # this work crossplatform. - return item.ident.replace('/', '_') + self.fileext - - def _random_href(self): - return str(uuid.uuid4()) + self.fileext + return generate_href(ident, '/') + self.fileext def list(self): for fname in os.listdir(self.path): @@ -112,7 +108,7 @@ class FilesystemStorage(Storage): raise TypeError('item.raw must be a unicode string.') try: - href = self._deterministic_href(item) + href = self._get_href(item.ident) fpath, etag = self._upload_impl(item, href) except OSError as e: if e.errno in ( @@ -121,7 +117,8 @@ class FilesystemStorage(Storage): ): logger.debug('UID as filename rejected, trying with random ' 'one.') - href = self._random_href() + # random href instead of UID-based + href = self._get_href(None) fpath, etag = self._upload_impl(item, href) else: raise diff --git a/vdirsyncer/utils/__init__.py b/vdirsyncer/utils/__init__.py index d114b92..8f88311 100644 --- a/vdirsyncer/utils/__init__.py +++ b/vdirsyncer/utils/__init__.py @@ -2,8 +2,9 @@ import os import sys +import uuid -from .compat import iteritems +from .compat import iteritems, to_unicode from .. import exceptions @@ -156,3 +157,12 @@ class cached_property(object): return self obj.__dict__[self.__name__] = result = self.fget(obj) return result + + +def generate_href(ident=None, unsafe=''): + if ident and \ + ident.encode('ascii', 'ignore').decode('ascii') == ident: + for char in unsafe: + ident = ident.replace(char, '_') + return ident + return to_unicode(uuid.uuid4().hex) diff --git a/vdirsyncer/utils/compat.py b/vdirsyncer/utils/compat.py index 840bc52..789ade6 100644 --- a/vdirsyncer/utils/compat.py +++ b/vdirsyncer/utils/compat.py @@ -10,14 +10,34 @@ if sys.version_info < (3, 3) and sys.version_info[:2] != (2, 7): ) +def to_unicode(x, encoding='ascii'): + if not isinstance(x, text_type): + x = x.decode(encoding) + return x + + +def to_bytes(x, encoding='ascii'): + if not isinstance(x, bytes): + x = x.encode(encoding) + return x + + if PY2: # pragma: no cover import urlparse - from urllib import \ - quote as urlquote, \ - unquote as urlunquote + import urllib as _urllib + + # Horrible hack to make urllib play nice with u'...' urls from requests + def urlquote(x, *a, **kw): + return _urllib.quote(to_native(x, 'utf-8'), *a, **kw) + + def urlunquote(x, *a, **kw): + return _urllib.unquote(to_native(x, 'utf-8'), *a, **kw) + text_type = unicode # flake8: noqa iteritems = lambda x: x.iteritems() itervalues = lambda x: x.itervalues() + to_native = to_bytes + else: # pragma: no cover import urllib.parse as urlparse urlquote = urlparse.quote @@ -25,6 +45,7 @@ else: # pragma: no cover text_type = str iteritems = lambda x: x.items() itervalues = lambda x: x.values() + to_native = to_unicode def with_metaclass(meta, *bases): From 237aacee7d603bdd9ec58159b8f4abd68fba6b29 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 11 Jun 2015 12:56:02 +0200 Subject: [PATCH 2/2] Deal with unicode UIDs in sync --- tests/test_sync.py | 8 ++++++++ vdirsyncer/sync.py | 18 +++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/test_sync.py b/tests/test_sync.py index a4ee73a..60c6192 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -311,3 +311,11 @@ def test_moved_href(): assert len(status) == 1 assert len(list(a.list())) == len(list(b.list())) == 1 assert status['haha'][2] == 'haha' + + +def test_unicode_hrefs(): + a = MemoryStorage() + b = MemoryStorage() + status = {} + href, etag = a.upload(Item(u'UID:äää')) + sync(a, b, status) diff --git a/vdirsyncer/sync.py b/vdirsyncer/sync.py index 724ed0d..ae9edac 100644 --- a/vdirsyncer/sync.py +++ b/vdirsyncer/sync.py @@ -184,7 +184,7 @@ def sync(storage_a, storage_b, status, conflict_resolution=None, def _action_upload(ident, source, dest): def inner(a, b, conflict_resolution): - sync_logger.info('Copying (uploading) item {} to {}' + sync_logger.info(u'Copying (uploading) item {} to {}' .format(ident, dest.storage)) source_meta = source.idents[ident] @@ -205,12 +205,12 @@ def _action_upload(ident, source, dest): def _action_update(ident, source, dest): def inner(a, b, conflict_resolution): - sync_logger.info('Copying (updating) item {} to {}' + sync_logger.info(u'Copying (updating) item {} to {}' .format(ident, dest.storage)) source_meta = source.idents[ident] if dest.storage.read_only: - sync_logger.info('{dest} is read-only. Skipping update...' + sync_logger.info(u'{dest} is read-only. Skipping update...' .format(dest=dest.storage)) dest_href = dest_etag = None else: @@ -231,7 +231,7 @@ def _action_delete(ident, info): idents = info.idents def inner(a, b, conflict_resolution): - sync_logger.info('Deleting item {} from {}'.format(ident, storage)) + sync_logger.info(u'Deleting item {} from {}'.format(ident, storage)) if storage.read_only: sync_logger.warning('{} is read-only, skipping deletion...' .format(storage)) @@ -249,7 +249,7 @@ def _action_delete(ident, info): def _action_delete_status(ident): def inner(a, b, conflict_resolution): - sync_logger.info('Deleting status info for nonexisting item {}' + sync_logger.info(u'Deleting status info for nonexisting item {}' .format(ident)) del a.status[ident] del b.status[ident] @@ -259,23 +259,23 @@ def _action_delete_status(ident): def _action_conflict_resolve(ident): def inner(a, b, conflict_resolution): - sync_logger.info('Doing conflict resolution for item {}...' + sync_logger.info(u'Doing conflict resolution for item {}...' .format(ident)) meta_a = a.idents[ident] meta_b = b.idents[ident] if meta_a['item'].raw == meta_b['item'].raw: - sync_logger.info('...same content on both sides.') + sync_logger.info(u'...same content on both sides.') a.status[ident] = meta_a['href'], meta_a['etag'] b.status[ident] = meta_b['href'], meta_b['etag'] elif conflict_resolution is None: raise SyncConflict(ident=ident, href_a=meta_a['href'], href_b=meta_b['href']) elif conflict_resolution == 'a wins': - sync_logger.info('...{} wins.'.format(a.storage)) + sync_logger.info(u'...{} wins.'.format(a.storage)) _action_update(ident, a, b)(a, b, conflict_resolution) elif conflict_resolution == 'b wins': - sync_logger.info('...{} wins.'.format(b.storage)) + sync_logger.info(u'...{} wins.'.format(b.storage)) _action_update(ident, b, a)(a, b, conflict_resolution) else: raise exceptions.UserError('Invalid conflict resolution mode: {}'