Fix unicode URLs

2026-04-27 14:57:41 +00:00 · 2015-06-10 18:06:39 +02:00 · 2015-06-10 18:06:39 +02:00 · 263a45e2a5
commit 263a45e2a5
parent a07c9bea44
5 changed files with 58 additions and 19 deletions
--- a/tests/storage/dav/test_main.py
+++ b/tests/storage/dav/test_main.py
@ -9,7 +9,8 @@ import pytest
 import requests
 import requests.exceptions

-from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE
+from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
+    assert_item_equals

 import vdirsyncer.exceptions as exceptions
 from vdirsyncer.storage.base import Item
@ -48,6 +49,16 @@ class DavStorageTests(ServerMixin, StorageTests):

        assert list(s.get_multi([])) == []

+    def test_dav_unicode_href(self, s, get_item, monkeypatch):
+        if self.dav_server != 'radicale':
+            # Radicale is unable to deal with unicode hrefs
+            monkeypatch.setattr(s, '_get_href',
+                                lambda item: item.ident + s.fileext)
+        item = get_item(uid=u'lolätvdirsynceröü град сатану')
+        href, etag = s.upload(item)
+        item2, etag2 = s.get(href)
+        assert_item_equals(item, item2)
+

 class TestCaldavStorage(DavStorageTests):
    storage_class = CaldavStorage
--- a/vdirsyncer/storage/dav.py
+++ b/vdirsyncer/storage/dav.py
@ -11,7 +11,7 @@ from .base import Item, Storage
 from .http import HTTP_STORAGE_PARAMETERS, USERAGENT, prepare_auth, \
    prepare_client_cert, prepare_verify
 from .. import exceptions, log, utils
-from ..utils.compat import text_type
+from ..utils.compat import text_type, to_native


 dav_logger = log.get(__name__)
@ -22,6 +22,8 @@ CALDAV_DT_FORMAT = '%Y%m%dT%H%M%SZ'
 def _normalize_href(base, href):
    '''Normalize the href to be a path only relative to hostname and
    schema.'''
+    base = to_native(base, 'utf-8')
+    href = to_native(href, 'utf-8')
    if not href:
        raise ValueError(href)
    x = utils.compat.urlparse.urljoin(base, href)
@ -363,9 +365,7 @@ class DavStorage(Storage):
        return _normalize_href(self.session.url, *args, **kwargs)

    def _get_href(self, item):
-        href = item.ident
-        for char in self.unsafe_href_chars:
-            href = href.replace(char, '_')
+        href = utils.generate_href(item.ident, unsafe=self.unsafe_href_chars)
        return self._normalize_href(href + self.fileext)

    def _is_item_mimetype(self, mimetype):
@ -440,7 +440,7 @@ class DavStorage(Storage):
            headers=headers
        )
        etag = response.headers.get('etag', None)
-        href = self._normalize_href(_decode_url(response.url))
+        href = _decode_url(self._normalize_href(response.url))
        if not etag:
            dav_logger.warning('Race condition detected with server {}, '
                               'consider using an alternative.'
--- a/vdirsyncer/storage/filesystem.py
+++ b/vdirsyncer/storage/filesystem.py
@ -3,13 +3,12 @@
 import errno
 import os
 import subprocess
-import uuid

 from atomicwrites import atomic_write

 from .base import Item, Storage
 from .. import exceptions, log
-from ..utils import checkdir, expand_path, get_etag_from_file, \
+from ..utils import checkdir, expand_path, generate_href, get_etag_from_file, \
    get_etag_from_fileobject
 from ..utils.compat import text_type

@ -81,13 +80,10 @@ class FilesystemStorage(Storage):
    def _get_filepath(self, href):
        return os.path.join(self.path, href)

-    def _deterministic_href(self, item):
+    def _get_href(self, ident):
        # XXX: POSIX only defines / and \0 as invalid chars, but we should make
        # this work crossplatform.
-        return item.ident.replace('/', '_') + self.fileext
-
-    def _random_href(self):
-        return str(uuid.uuid4()) + self.fileext
+        return generate_href(ident, '/') + self.fileext

    def list(self):
        for fname in os.listdir(self.path):
@ -112,7 +108,7 @@ class FilesystemStorage(Storage):
            raise TypeError('item.raw must be a unicode string.')

        try:
-            href = self._deterministic_href(item)
+            href = self._get_href(item.ident)
            fpath, etag = self._upload_impl(item, href)
        except OSError as e:
            if e.errno in (
@ -121,7 +117,8 @@ class FilesystemStorage(Storage):
            ):
                logger.debug('UID as filename rejected, trying with random '
                             'one.')
-                href = self._random_href()
+                # random href instead of UID-based
+                href = self._get_href(None)
                fpath, etag = self._upload_impl(item, href)
            else:
                raise
--- a/vdirsyncer/utils/init.py
+++ b/vdirsyncer/utils/init.py
@ -2,8 +2,9 @@

 import os
 import sys
+import uuid

-from .compat import iteritems
+from .compat import iteritems, to_unicode
 from .. import exceptions


@ -156,3 +157,12 @@ class cached_property(object):
            return self
        obj.__dict__[self.__name__] = result = self.fget(obj)
        return result
+
+
+def generate_href(ident=None, unsafe=''):
+    if ident and \
+       ident.encode('ascii', 'ignore').decode('ascii') == ident:
+        for char in unsafe:
+            ident = ident.replace(char, '_')
+        return ident
+    return to_unicode(uuid.uuid4().hex)
--- a/vdirsyncer/utils/compat.py
+++ b/vdirsyncer/utils/compat.py
@ -10,14 +10,34 @@ if sys.version_info < (3, 3) and sys.version_info[:2] != (2, 7):
    )


+def to_unicode(x, encoding='ascii'):
+    if not isinstance(x, text_type):
+        x = x.decode(encoding)
+    return x
+
+
+def to_bytes(x, encoding='ascii'):
+    if not isinstance(x, bytes):
+        x = x.encode(encoding)
+    return x
+
+
 if PY2:  # pragma: no cover
    import urlparse
-    from urllib import \
-         quote as urlquote, \
-         unquote as urlunquote
+    import urllib as _urllib
+
+    # Horrible hack to make urllib play nice with u'...' urls from requests
+    def urlquote(x, *a, **kw):
+        return _urllib.quote(to_native(x, 'utf-8'), *a, **kw)
+
+    def urlunquote(x, *a, **kw):
+        return _urllib.unquote(to_native(x, 'utf-8'), *a, **kw)
+
    text_type = unicode  # flake8: noqa
    iteritems = lambda x: x.iteritems()
    itervalues = lambda x: x.itervalues()
+    to_native = to_bytes
+
 else:  # pragma: no cover
    import urllib.parse as urlparse
    urlquote = urlparse.quote
@ -25,6 +45,7 @@ else:  # pragma: no cover
    text_type = str
    iteritems = lambda x: x.items()
    itervalues = lambda x: x.values()
+    to_native = to_unicode


 def with_metaclass(meta, *bases):