Fix unicode URLs

This commit is contained in:
Markus Unterwaditzer 2015-06-10 18:06:39 +02:00
parent a07c9bea44
commit 263a45e2a5
5 changed files with 58 additions and 19 deletions

View file

@ -9,7 +9,8 @@ import pytest
import requests
import requests.exceptions
from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE
from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
assert_item_equals
import vdirsyncer.exceptions as exceptions
from vdirsyncer.storage.base import Item
@ -48,6 +49,16 @@ class DavStorageTests(ServerMixin, StorageTests):
assert list(s.get_multi([])) == []
def test_dav_unicode_href(self, s, get_item, monkeypatch):
if self.dav_server != 'radicale':
# Radicale is unable to deal with unicode hrefs
monkeypatch.setattr(s, '_get_href',
lambda item: item.ident + s.fileext)
item = get_item(uid=u'lolätvdirsynceröü град сатану')
href, etag = s.upload(item)
item2, etag2 = s.get(href)
assert_item_equals(item, item2)
class TestCaldavStorage(DavStorageTests):
storage_class = CaldavStorage

View file

@ -11,7 +11,7 @@ from .base import Item, Storage
from .http import HTTP_STORAGE_PARAMETERS, USERAGENT, prepare_auth, \
prepare_client_cert, prepare_verify
from .. import exceptions, log, utils
from ..utils.compat import text_type
from ..utils.compat import text_type, to_native
dav_logger = log.get(__name__)
@ -22,6 +22,8 @@ CALDAV_DT_FORMAT = '%Y%m%dT%H%M%SZ'
def _normalize_href(base, href):
'''Normalize the href to be a path only relative to hostname and
schema.'''
base = to_native(base, 'utf-8')
href = to_native(href, 'utf-8')
if not href:
raise ValueError(href)
x = utils.compat.urlparse.urljoin(base, href)
@ -363,9 +365,7 @@ class DavStorage(Storage):
return _normalize_href(self.session.url, *args, **kwargs)
def _get_href(self, item):
href = item.ident
for char in self.unsafe_href_chars:
href = href.replace(char, '_')
href = utils.generate_href(item.ident, unsafe=self.unsafe_href_chars)
return self._normalize_href(href + self.fileext)
def _is_item_mimetype(self, mimetype):
@ -440,7 +440,7 @@ class DavStorage(Storage):
headers=headers
)
etag = response.headers.get('etag', None)
href = self._normalize_href(_decode_url(response.url))
href = _decode_url(self._normalize_href(response.url))
if not etag:
dav_logger.warning('Race condition detected with server {}, '
'consider using an alternative.'

View file

@ -3,13 +3,12 @@
import errno
import os
import subprocess
import uuid
from atomicwrites import atomic_write
from .base import Item, Storage
from .. import exceptions, log
from ..utils import checkdir, expand_path, get_etag_from_file, \
from ..utils import checkdir, expand_path, generate_href, get_etag_from_file, \
get_etag_from_fileobject
from ..utils.compat import text_type
@ -81,13 +80,10 @@ class FilesystemStorage(Storage):
def _get_filepath(self, href):
return os.path.join(self.path, href)
def _deterministic_href(self, item):
def _get_href(self, ident):
# XXX: POSIX only defines / and \0 as invalid chars, but we should make
# this work crossplatform.
return item.ident.replace('/', '_') + self.fileext
def _random_href(self):
return str(uuid.uuid4()) + self.fileext
return generate_href(ident, '/') + self.fileext
def list(self):
for fname in os.listdir(self.path):
@ -112,7 +108,7 @@ class FilesystemStorage(Storage):
raise TypeError('item.raw must be a unicode string.')
try:
href = self._deterministic_href(item)
href = self._get_href(item.ident)
fpath, etag = self._upload_impl(item, href)
except OSError as e:
if e.errno in (
@ -121,7 +117,8 @@ class FilesystemStorage(Storage):
):
logger.debug('UID as filename rejected, trying with random '
'one.')
href = self._random_href()
# random href instead of UID-based
href = self._get_href(None)
fpath, etag = self._upload_impl(item, href)
else:
raise

View file

@ -2,8 +2,9 @@
import os
import sys
import uuid
from .compat import iteritems
from .compat import iteritems, to_unicode
from .. import exceptions
@ -156,3 +157,12 @@ class cached_property(object):
return self
obj.__dict__[self.__name__] = result = self.fget(obj)
return result
def generate_href(ident=None, unsafe=''):
if ident and \
ident.encode('ascii', 'ignore').decode('ascii') == ident:
for char in unsafe:
ident = ident.replace(char, '_')
return ident
return to_unicode(uuid.uuid4().hex)

View file

@ -10,14 +10,34 @@ if sys.version_info < (3, 3) and sys.version_info[:2] != (2, 7):
)
def to_unicode(x, encoding='ascii'):
if not isinstance(x, text_type):
x = x.decode(encoding)
return x
def to_bytes(x, encoding='ascii'):
if not isinstance(x, bytes):
x = x.encode(encoding)
return x
if PY2: # pragma: no cover
import urlparse
from urllib import \
quote as urlquote, \
unquote as urlunquote
import urllib as _urllib
# Horrible hack to make urllib play nice with u'...' urls from requests
def urlquote(x, *a, **kw):
return _urllib.quote(to_native(x, 'utf-8'), *a, **kw)
def urlunquote(x, *a, **kw):
return _urllib.unquote(to_native(x, 'utf-8'), *a, **kw)
text_type = unicode # flake8: noqa
iteritems = lambda x: x.iteritems()
itervalues = lambda x: x.itervalues()
to_native = to_bytes
else: # pragma: no cover
import urllib.parse as urlparse
urlquote = urlparse.quote
@ -25,6 +45,7 @@ else: # pragma: no cover
text_type = str
iteritems = lambda x: x.items()
itervalues = lambda x: x.values()
to_native = to_unicode
def with_metaclass(meta, *bases):