Support unicode collections

- DAV: Avoid re-coding if possible
- Filesystem: Convert to native strings because that's what `os.path`
  utilities expect.
This commit is contained in:
Markus Unterwaditzer 2015-06-13 14:35:56 +02:00
parent 3cb6f3389d
commit 2866bbde5f
3 changed files with 45 additions and 26 deletions

View file

@ -6,7 +6,7 @@ import pytest
import vdirsyncer.exceptions as exceptions import vdirsyncer.exceptions as exceptions
from vdirsyncer.storage.base import Item from vdirsyncer.storage.base import Item
from vdirsyncer.utils.compat import iteritems, text_type from vdirsyncer.utils.compat import PY2, iteritems, text_type
from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \ from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
assert_item_equals assert_item_equals
@ -215,3 +215,15 @@ class StorageTests(object):
items = list(href for href, etag in s.list()) items = list(href for href, etag in s.list())
assert len(items) == 2 assert len(items) == 2
assert len(set(items)) == 2 assert len(set(items)) == 2
@pytest.mark.parametrize('collname', [
'test@foo',
'testätfoo',
])
def test_specialchar_collection(self, requires_collections,
get_storage_args, get_item, collname):
if getattr(self, 'dav_server', '') == 'radicale' and PY2:
pytest.skip('Radicale is broken on Python 2.')
s = self.storage_class(**get_storage_args(collection=collname))
href, etag = s.upload(get_item())
s.get(href)

View file

@ -22,23 +22,23 @@ CALDAV_DT_FORMAT = '%Y%m%dT%H%M%SZ'
def _normalize_href(base, href): def _normalize_href(base, href):
'''Normalize the href to be a path only relative to hostname and '''Normalize the href to be a path only relative to hostname and
schema.''' schema.'''
orig_href = href
base = to_native(base, 'utf-8') base = to_native(base, 'utf-8')
href = to_native(href, 'utf-8') href = to_native(href, 'utf-8')
if not href: if not href:
raise ValueError(href) raise ValueError(href)
x = utils.compat.urlparse.urljoin(base, href) x = utils.compat.urlparse.urljoin(base, href)
x = utils.compat.urlparse.urlsplit(x).path x = utils.compat.urlparse.urlsplit(x).path
x = utils.compat.urlunquote(x)
x = utils.compat.urlquote(x, '/@%')
dav_logger.debug('Normalized URL from {!r} to {!r}'.format(orig_href, x))
return x return x
def _encode_url(x):
return utils.compat.urlquote(x, '/@')
def _decode_url(x):
return utils.compat.urlunquote(x)
class InvalidXMLResponse(exceptions.InvalidResponse): class InvalidXMLResponse(exceptions.InvalidResponse):
pass pass
@ -80,7 +80,7 @@ def _fuzzy_matches_mimetype(strict, weak):
def _get_collection_from_url(url): def _get_collection_from_url(url):
_, collection = url.rstrip('/').rsplit('/', 1) _, collection = url.rstrip('/').rsplit('/', 1)
return collection return utils.compat.urlunquote(collection)
class Discover(object): class Discover(object):
@ -204,7 +204,10 @@ class Discover(object):
return c return c
home = self.find_home() home = self.find_home()
url = '{}/{}'.format(home.rstrip('/'), collection) url = utils.compat.urlparse.urljoin(
home,
utils.compat.urlquote(collection, '/@')
)
try: try:
url = self._create_collection_impl(url) url = self._create_collection_impl(url)
@ -382,7 +385,7 @@ class DavStorage(Storage):
for href in hrefs: for href in hrefs:
if href != self._normalize_href(href): if href != self._normalize_href(href):
raise exceptions.NotFoundError(href) raise exceptions.NotFoundError(href)
href_xml.append('<D:href>{}</D:href>'.format(_encode_url(href))) href_xml.append('<D:href>{}</D:href>'.format(href))
if not href_xml: if not href_xml:
return () return ()
@ -435,12 +438,12 @@ class DavStorage(Storage):
response = self.session.request( response = self.session.request(
'PUT', 'PUT',
_encode_url(href), href,
data=item.raw.encode('utf-8'), data=item.raw.encode('utf-8'),
headers=headers headers=headers
) )
etag = response.headers.get('etag', None) etag = response.headers.get('etag', None)
href = _decode_url(self._normalize_href(response.url)) href = self._normalize_href(response.url)
if not etag: if not etag:
dav_logger.warning('Race condition detected with server {}, ' dav_logger.warning('Race condition detected with server {}, '
'consider using an alternative.' 'consider using an alternative.'
@ -468,11 +471,11 @@ class DavStorage(Storage):
self.session.request( self.session.request(
'DELETE', 'DELETE',
_encode_url(href), href,
headers=headers headers=headers
) )
def _parse_prop_responses(self, root, decoding_rounds=1): def _parse_prop_responses(self, root):
hrefs = set() hrefs = set()
for response in root.iter('{DAV:}response'): for response in root.iter('{DAV:}response'):
href = response.find('{DAV:}href') href = response.find('{DAV:}href')
@ -481,8 +484,6 @@ class DavStorage(Storage):
continue continue
href = self._normalize_href(href.text) href = self._normalize_href(href.text)
for i in range(decoding_rounds):
href = _decode_url(href)
if href in hrefs: if href in hrefs:
# Servers that send duplicate hrefs: # Servers that send duplicate hrefs:
@ -544,9 +545,9 @@ class DavStorage(Storage):
# Decode twice because ownCloud encodes twice. # Decode twice because ownCloud encodes twice.
# See https://github.com/owncloud/contacts/issues/581 # See https://github.com/owncloud/contacts/issues/581
rv = self._parse_prop_responses(root, decoding_rounds=2) rv = self._parse_prop_responses(root)
for href, etag, prop in rv: for href, etag, prop in rv:
yield href, etag yield utils.compat.urlunquote(href), etag
class CaldavStorage(DavStorage): class CaldavStorage(DavStorage):

View file

@ -10,7 +10,7 @@ from .base import Item, Storage
from .. import exceptions, log from .. import exceptions, log
from ..utils import checkdir, expand_path, generate_href, get_etag_from_file, \ from ..utils import checkdir, expand_path, generate_href, get_etag_from_file, \
get_etag_from_fileobject get_etag_from_fileobject
from ..utils.compat import text_type from ..utils.compat import text_type, to_native
logger = log.get(__name__) logger = log.get(__name__)
@ -30,7 +30,7 @@ class FilesystemStorage(Storage):
href, so if you change the file extension after a sync, this will href, so if you change the file extension after a sync, this will
trigger a re-download of everything (but *should* not cause data-loss trigger a re-download of everything (but *should* not cause data-loss
of any kind). of any kind).
:param encoding: File encoding for items. :param encoding: File encoding for items, both content and filename.
:param post_hook: A command to call for each item creation and :param post_hook: A command to call for each item creation and
modification. The command will be called with the path of the modification. The command will be called with the path of the
new/updated file. new/updated file.
@ -42,7 +42,7 @@ class FilesystemStorage(Storage):
def __init__(self, path, fileext, encoding='utf-8', post_hook=None, def __init__(self, path, fileext, encoding='utf-8', post_hook=None,
**kwargs): **kwargs):
super(FilesystemStorage, self).__init__(**kwargs) super(FilesystemStorage, self).__init__(**kwargs)
path = expand_path(path) path = expand_path(to_native(path, encoding))
checkdir(path, create=False) checkdir(path, create=False)
self.path = path self.path = path
self.encoding = encoding self.encoding = encoding
@ -70,15 +70,21 @@ class FilesystemStorage(Storage):
@classmethod @classmethod
def create_collection(cls, collection, **kwargs): def create_collection(cls, collection, **kwargs):
kwargs = dict(kwargs) kwargs = dict(kwargs)
encoding = kwargs.get('encoding', 'utf-8')
path = to_native(kwargs['path'], encoding)
if collection is not None: if collection is not None:
kwargs['path'] = os.path.join(kwargs['path'], collection) collection = to_native(collection, encoding)
checkdir(expand_path(kwargs['path']), create=True) path = os.path.join(path, collection)
checkdir(expand_path(path), create=True)
kwargs['path'] = path
kwargs['collection'] = collection kwargs['collection'] = collection
return kwargs return kwargs
def _get_filepath(self, href): def _get_filepath(self, href):
return os.path.join(self.path, href) return os.path.join(self.path, to_native(href, self.encoding))
def _get_href(self, ident): def _get_href(self, ident):
# XXX: POSIX only defines / and \0 as invalid chars, but we should make # XXX: POSIX only defines / and \0 as invalid chars, but we should make