From 06a701bc10dac16ff0ff304eb7cb9f502b71cf95 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sat, 13 Dec 2014 18:20:13 +0100 Subject: [PATCH 1/3] Handle collections correctly Fix #132 Passing the collections parameter used to mean that the storage should append its value to the URL or path. This was a leaky abstraction for the reasons explained in #132. The new behavior removes this meaning from this parameter. Vdirsyncer now maintains a cache of discovered collections. --- tests/storage/__init__.py | 20 +- .../storage/dav/servers/radicale/__init__.py | 1 + tests/storage/test_filesystem.py | 8 +- tests/test_cli.py | 137 +++++++- tests/utils/test_main.py | 3 +- vdirsyncer/cli.py | 294 ++++++++++++------ vdirsyncer/storage/base.py | 33 +- vdirsyncer/storage/dav.py | 13 +- vdirsyncer/storage/filesystem.py | 16 +- 9 files changed, 376 insertions(+), 149 deletions(-) diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py index c1ae61a..8454346 100644 --- a/tests/storage/__init__.py +++ b/tests/storage/__init__.py @@ -148,22 +148,30 @@ class SupportsCollections(object): def test_discover(self, get_storage_args, get_item): expected = set() + items = {} for i in range(1, 5): # Create collections, but use the "collection" attribute because # Radicale requires file extensions in their names. - expected.add( - self.storage_class( - **get_storage_args(collection='test{}'.format(i)) - ).collection - ) + collection = 'test{}'.format(i) + s = self.storage_class(**get_storage_args(collection=collection)) + items[s.collection] = [s.upload(get_item())] + expected.add(s.collection) d = self.storage_class.discover( **get_storage_args(collection=None)) - actual = set(s.collection for s in d) + actual = set(args['collection'] for args in d) assert actual >= expected + for storage_args in d: + collection = storage_args['collection'] + if collection not in expected: + continue + s = self.storage_class(**storage_args) + rv = list(s.list()) + assert rv == items[collection] + def test_discover_collection_arg(self, get_storage_args): args = get_storage_args(collection='test2') with pytest.raises(TypeError) as excinfo: diff --git a/tests/storage/dav/servers/radicale/__init__.py b/tests/storage/dav/servers/radicale/__init__.py index c6910be..f2eeaa7 100644 --- a/tests/storage/dav/servers/radicale/__init__.py +++ b/tests/storage/dav/servers/radicale/__init__.py @@ -110,6 +110,7 @@ class ServerMixin(object): url = 'http://127.0.0.1/bob/' if collection is not None: collection += self.storage_class.fileext + url = url.rstrip('/') + '/' + collection rv = {'url': url, 'username': 'bob', 'password': 'bob', 'collection': collection, 'unsafe_href_chars': ''} diff --git a/tests/storage/test_filesystem.py b/tests/storage/test_filesystem.py index f1cf83f..1a4bb71 100644 --- a/tests/storage/test_filesystem.py +++ b/tests/storage/test_filesystem.py @@ -31,22 +31,22 @@ class TestFilesystemStorage(StorageTests): path = self.tmpdir if collection is not None: os.makedirs(os.path.join(path, collection)) + path = os.path.join(path, collection) return {'path': path, 'fileext': '.txt', 'collection': collection} return inner def test_create_is_false(self, tmpdir): with pytest.raises(IOError): - self.storage_class(str(tmpdir), '.txt', collection='lol', - create=False) + self.storage_class(str(tmpdir) + '/lol', '.txt', create=False) def test_is_not_directory(self, tmpdir): with pytest.raises(IOError): f = tmpdir.join('hue') f.write('stub') - self.storage_class(str(tmpdir), '.txt', collection='hue') + self.storage_class(str(tmpdir) + '/hue', '.txt') def test_create_is_true(self, tmpdir): - self.storage_class(str(tmpdir), '.txt', collection='asd') + self.storage_class(str(tmpdir) + '/asd', '.txt') assert tmpdir.listdir() == [tmpdir.join('asd')] def test_broken_data(self, tmpdir): diff --git a/tests/test_cli.py b/tests/test_cli.py index 79810d4..68e4eb2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -79,11 +79,9 @@ def test_parse_pairs_args(): assert sorted( cli.parse_pairs_args(['foo/foocoll', 'one', 'eins'], pairs) ) == [ - ('eins', None), - ('foo', 'foocoll'), - ('one', 'a'), - ('one', 'b'), - ('one', 'c') + ('eins', set()), + ('foo', {'foocoll'}), + ('one', set()), ] @@ -111,21 +109,12 @@ def test_simple_run(tmpdir): runner = CliRunner(env={'VDIRSYNCER_CONFIG': str(config_file)}) result = runner.invoke(cli.app, ['sync']) assert not result.exception - assert result.output.lower().strip() == 'syncing my_pair' tmpdir.join('path_a/haha.txt').write('UID:haha') result = runner.invoke(cli.app, ['sync']) - assert result.output == ('Syncing my_pair\n' - 'Copying (uploading) item haha to my_b\n') + assert 'Copying (uploading) item haha to my_b' in result.output assert tmpdir.join('path_b/haha.txt').read() == 'UID:haha' - result = runner.invoke(cli.app, ['sync', 'my_pair', 'my_pair']) - assert set(result.output.splitlines()) == set([ - 'Syncing my_pair', - 'warning: Already prepared my_pair, skipping' - ]) - assert not result.exception - def test_empty_storage(tmpdir): config_file = tmpdir.join('config') @@ -151,7 +140,6 @@ def test_empty_storage(tmpdir): runner = CliRunner(env={'VDIRSYNCER_CONFIG': str(config_file)}) result = runner.invoke(cli.app, ['sync']) assert not result.exception - assert result.output.lower().strip() == 'syncing my_pair' tmpdir.join('path_a/haha.txt').write('UID:haha') result = runner.invoke(cli.app, ['sync']) @@ -244,3 +232,120 @@ def test_invalid_storage_name(): cli.load_config(f) assert 'invalid characters' in str(excinfo.value).lower() + + +def test_deprecated_item_status(tmpdir): + f = tmpdir.join('mypair.items') + f.write(dedent(''' + ["ident", ["href_a", "etag_a", "href_b", "etag_b"]] + ["ident_two", ["href_a", "etag_a", "href_b", "etag_b"]] + ''').strip()) + + data = { + 'ident': ['href_a', 'etag_a', 'href_b', 'etag_b'], + 'ident_two': ['href_a', 'etag_a', 'href_b', 'etag_b'] + } + + assert cli.load_status(str(tmpdir), 'mypair', data_type='items') == data + + cli.save_status(str(tmpdir), 'mypair', data_type='items', data=data) + assert cli.load_status(str(tmpdir), 'mypair', data_type='items') == data + + +def test_collections_cache_invalidation(tmpdir): + cfg = tmpdir.join('config') + cfg.write(dedent(''' + [general] + status_path = {0}/status/ + + [storage foo] + type = filesystem + path = {0}/foo/ + fileext = .txt + + [storage bar] + type = filesystem + path = {0}/bar/ + fileext = .txt + + [pair foobar] + a = foo + b = bar + collections = a, b, c + ''').format(str(tmpdir))) + + foo = tmpdir.mkdir('foo') + bar = tmpdir.mkdir('bar') + foo.mkdir('a').join('itemone.txt').write('UID:itemone') + + runner = CliRunner() + result = runner.invoke(cli.app, ['sync'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + + rv = bar.join('a').listdir() + assert len(rv) == 1 + assert rv[0].basename == 'itemone.txt' + + cfg.write(dedent(''' + [general] + status_path = {0}/status/ + + [storage foo] + type = filesystem + path = {0}/foo/ + fileext = .txt + + [storage bar] + type = filesystem + path = {0}/bar2/ + fileext = .txt + + [pair foobar] + a = foo + b = bar + collections = a, b, c + ''').format(str(tmpdir))) + + tmpdir.join('status').remove() + bar2 = tmpdir.mkdir('bar2') + result = runner.invoke(cli.app, ['sync'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + + rv = bar.join('a').listdir() + rv2 = bar2.join('a').listdir() + assert len(rv) == len(rv2) == 1 + assert rv[0].basename == rv2[0].basename == 'itemone.txt' + + +def test_invalid_pairs_as_cli_arg(tmpdir): + cfg = tmpdir.join('config') + cfg.write(dedent(''' + [general] + status_path = {0}/status/ + + [storage foo] + type = filesystem + path = {0}/foo/ + fileext = .txt + + [storage bar] + type = filesystem + path = {0}/bar/ + fileext = .txt + + [pair foobar] + a = foo + b = bar + collections = a, b, c + ''').format(str(tmpdir))) + + tmpdir.mkdir('foo') + tmpdir.mkdir('bar') + + runner = CliRunner() + result = runner.invoke(cli.app, ['sync', 'foobar/d'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert result.exception + assert 'pair foobar: collection d not found' in result.output.lower() diff --git a/tests/utils/test_main.py b/tests/utils/test_main.py index f33f2aa..5b7f0d0 100644 --- a/tests/utils/test_main.py +++ b/tests/utils/test_main.py @@ -246,7 +246,8 @@ def test_get_class_init_args_on_storage(): from vdirsyncer.storage.memory import MemoryStorage all, required = utils.get_class_init_args(MemoryStorage) - assert all == set(['fileext', 'collection', 'read_only', 'instance_name']) + assert all == set(['fileext', 'collection', 'read_only', 'instance_name', + 'collection_human']) assert not required diff --git a/vdirsyncer/cli.py b/vdirsyncer/cli.py index 2979a90..029ed47 100644 --- a/vdirsyncer/cli.py +++ b/vdirsyncer/cli.py @@ -9,6 +9,7 @@ import errno import functools +import hashlib import json import os import string @@ -78,6 +79,115 @@ def get_status_name(pair, collection): return pair + '/' + collection +def get_collections_cache_key(pair_options, config_a, config_b): + m = hashlib.sha256() + j = json.dumps([pair_options, config_a, config_b], sort_keys=True) + m.update(j.encode('utf-8')) + return m.hexdigest() + + +def collections_for_pair(status_path, name_a, name_b, pair_name, config_a, + config_b, pair_options, skip_cache=False): + '''Determine all configured collections for a given pair. Takes care of + shortcut expansion and result caching. + + :param status_path: The path to the status directory. + :param name_a: The config name of storage A. + :param name_b: The config name of storage B. + :param pair_name: The config name of the pair. + :param config_a: The configuration for storage A, with pair-defined + defaults. + :param config_b: The configuration for storage B, with pair-defined + defaults. + :param pair_options: Pair-specific options. + :param skip_cache: Whether to skip the cached data and always do discovery. + Even with this option enabled, the new cache is written. + + :returns: iterable of (collection, a_args, b_args) + ''' + rv = load_status(status_path, pair_name, data_type='collections') + cache_key = get_collections_cache_key(pair_options, config_a, config_b) + if rv and not skip_cache: + if rv.get('cache_key', None) == cache_key: + return rv.get('collections', rv) + elif rv: + cli_logger.info('Detected change in config file, discovering ' + 'collections for {}'.format(pair_name)) + + cli_logger.info('Discovering collections for pair {}' + .format(pair_name)) + + # We have to use a list here because the special None/null value would get + # mangled to string (because JSON objects always have string keys). + rv = list(_collections_for_pair_impl(status_path, name_a, name_b, + pair_name, config_a, config_b, + pair_options)) + save_status(status_path, pair_name, data_type='collections', + data={'collections': rv, 'cache_key': cache_key}) + return rv + + +def _collections_for_pair_impl(status_path, name_a, name_b, pair_name, + config_a, config_b, pair_options): + + def _discover_from_config(config): + storage_type = config['type'] + cls, config = storage_class_from_config(config) + + try: + discovered = list(cls.discover(**config)) + except Exception: + return handle_storage_init_error(cls, config) + else: + rv = {} + for args in discovered: + args['type'] = storage_type + rv[args['collection']] = args + return rv + + def _get_coll(discovered, collection, storage_name, config): + try: + return discovered[collection] + except KeyError: + storage_type = config['type'] + cls, config = storage_class_from_config(config) + try: + rv = cls.join_collection(collection=collection, **config) + rv['type'] = storage_type + return rv + except NotImplementedError: + raise CliError( + 'Unable to find collection {collection!r} for storage ' + '{storage_name!r}.\n For pair {pair_name!r}, you wanted ' + 'to use the collections {shortcut!r}, which yielded ' + '{collection!r} (amongst others). Vdirsyncer was unable ' + 'to find an equivalent collection for the other ' + 'storage.'.format( + collection=collection, shortcut=shortcut, + storage_name=storage_name, pair_name=pair_name + ) + ) + + shortcuts = set(_parse_old_config_list_value(pair_options, 'collections')) + if not shortcuts: + yield None, (config_a, config_b) + else: + a_discovered = _discover_from_config(config_a) + b_discovered = _discover_from_config(config_b) + + for shortcut in shortcuts: + if shortcut in ('from a', 'from b'): + collections = (a_discovered if shortcut == 'from a' + else b_discovered) + else: + collections = [shortcut] + + for collection in collections: + a_args = _get_coll(a_discovered, collection, name_a, config_a) + b_args = _get_coll(b_discovered, collection, name_b, config_b) + yield collection, (a_args, b_args) + + def validate_general_section(general_config): if general_config is None: raise CliError( @@ -143,19 +253,43 @@ def load_config(f, pair_options=('collections', 'conflict_resolution')): return general, pairs, storages -def load_status(path, status_name): - full_path = expand_path(os.path.join(path, status_name)) - if not os.path.exists(full_path): - return {} - with open(full_path) as f: - return dict(json.loads(line) for line in f) +def load_status(base_path, pair, collection=None, data_type=None): + assert data_type is not None + status_name = get_status_name(pair, collection) + path = expand_path(os.path.join(base_path, status_name)) + if os.path.isfile(path) and data_type == 'items': + new_path = path + '.items' + cli_logger.warning('Migrating statuses: Renaming {} to {}' + .format(path, new_path)) + os.rename(path, new_path) + + path += '.' + data_type + if not os.path.exists(path): + return None + + with open(path) as f: + try: + return dict(json.load(f)) + except ValueError: + pass + + f.seek(0) + try: + return dict(json.loads(line) for line in f) + except ValueError: + pass + + return {} -def save_status(path, status_name, status): - full_path = expand_path(os.path.join(path, status_name)) - base_path = os.path.dirname(full_path) +def save_status(base_path, pair, collection=None, data_type=None, data=None): + assert data_type is not None + assert data is not None + status_name = get_status_name(pair, collection) + path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type + base_path = os.path.dirname(path) - if os.path.isfile(base_path): + if collection is not None and os.path.isfile(base_path): raise CliError('{} is probably a legacy file and could be removed ' 'automatically, but this choice is left to the ' 'user. If you think this is an error, please file ' @@ -167,10 +301,8 @@ def save_status(path, status_name, status): if e.errno != errno.EEXIST: raise - with safe_write(full_path, 'w+') as f: - for k, v in status.items(): - json.dump((k, v), f) - f.write('\n') + with safe_write(path, 'w+') as f: + json.dump(data, f) def storage_class_from_config(config): @@ -226,33 +358,24 @@ def handle_storage_init_error(cls, config): def parse_pairs_args(pairs_args, all_pairs): ''' Expand the various CLI shortforms ("pair, pair/collection") to an iterable - of (pair, collection). + of (pair, collections). ''' - if not pairs_args: - pairs_args = list(all_pairs) - for pair_and_collection in pairs_args: + rv = {} + for pair_and_collection in (pairs_args or all_pairs): pair, collection = pair_and_collection, None if '/' in pair: pair, collection = pair.split('/') - try: - a_name, b_name, pair_options, storage_defaults = \ - all_pairs[pair] - except KeyError: + if pair not in all_pairs: raise CliError('Pair not found: {}\n' 'These are the pairs found: {}' .format(pair, list(all_pairs))) - if collection is None: - collections = set( - _parse_old_config_list_value(pair_options, 'collections') - or [None] - ) - else: - collections = [collection] + collections = rv.setdefault(pair, set()) + if collection: + collections.add(collection) - for c in collections: - yield pair, c + return rv.items() # We create the app inside a factory and destroy that factory after first use # to avoid pollution of the module namespace. @@ -337,12 +460,12 @@ def _create_app(): wq = WorkerQueue(max_workers) wq.handled_jobs = set() - for pair_name, collection in parse_pairs_args(pairs, all_pairs): + for pair_name, collections in parse_pairs_args(pairs, all_pairs): wq.spawn_worker() wq.put( - functools.partial(prepare_sync, pair_name=pair_name, - collection=collection, general=general, - all_pairs=all_pairs, + functools.partial(sync_pair, pair_name=pair_name, + collections_to_sync=collections, + general=general, all_pairs=all_pairs, all_storages=all_storages, force_delete=force_delete)) @@ -354,76 +477,40 @@ app = main = _create_app() del _create_app -def expand_collection(pair_name, collection, general, all_pairs, all_storages): - a_name, b_name, pair_options, storage_defaults = all_pairs[pair_name] - if collection in ('from a', 'from b'): - # from_name: name of the storage which should be used for discovery - # other_name: the other storage's name - if collection == 'from a': - from_name, other_name = a_name, b_name - else: - from_name, other_name = b_name, a_name - - cli_logger.info('Syncing {}: Discovering collections from {}' - .format(pair_name, from_name)) - - config = dict(storage_defaults) - config.update(all_storages[from_name]) - cls, config = storage_class_from_config(config) - try: - storages = list(cls.discover(**config)) - except Exception: - handle_storage_init_error(cls, config) - - for storage in storages: - config = dict(storage_defaults) - config.update(all_storages[other_name]) - config['collection'] = actual_collection = storage.collection - other_storage = storage_instance_from_config(config) - - if collection == 'from a': - a, b = storage, other_storage - else: - b, a = storage, other_storage - - yield actual_collection, a, b - else: - config = dict(storage_defaults) - config.update(all_storages[a_name]) - config['collection'] = collection - a = storage_instance_from_config(config) - - config = dict(storage_defaults) - config.update(all_storages[b_name]) - config['collection'] = collection - b = storage_instance_from_config(config) - - yield collection, a, b - - -def prepare_sync(wq, pair_name, collection, general, all_pairs, all_storages, - force_delete): - key = ('prepare', pair_name, collection) +def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs, + all_storages, force_delete): + key = ('prepare', pair_name) if key in wq.handled_jobs: - status_name = get_status_name(pair_name, collection) - cli_logger.warning('Already prepared {}, skipping'.format(status_name)) + cli_logger.warning('Already prepared {}, skipping'.format(pair_name)) return wq.handled_jobs.add(key) a_name, b_name, pair_options, storage_defaults = all_pairs[pair_name] - jobs = list(expand_collection(pair_name, collection, general, all_pairs, - all_storages)) - for i in range(len(jobs) - 1): - # spawn one worker less because we can reuse the current one + all_collections = dict(collections_for_pair( + general['status_path'], a_name, b_name, pair_name, + all_storages[a_name], all_storages[b_name], pair_options + )) + + # spawn one worker less because we can reuse the current one + new_workers = -1 + for collection in (collections_to_sync or all_collections): + try: + config_a, config_b = all_collections[collection] + except KeyError: + raise CliError('Pair {}: Collection {} not found. These are the ' + 'configured collections:\n{}'.format( + pair_name, collection, list(all_collections))) + new_workers += 1 + wq.put(functools.partial( + sync_collection, pair_name=pair_name, collection=collection, + config_a=config_a, config_b=config_b, pair_options=pair_options, + general=general, force_delete=force_delete + )) + + for i in range(new_workers): wq.spawn_worker() - for collection, a, b in jobs: - wq.put(functools.partial(sync_collection, pair_name=pair_name, - collection=collection, a=a, b=b, - pair_options=pair_options, general=general, - force_delete=force_delete)) - def handle_cli_error(status_name='sync'): try: @@ -460,8 +547,8 @@ def handle_cli_error(status_name='sync'): return True -def sync_collection(wq, pair_name, collection, a, b, pair_options, general, - force_delete): +def sync_collection(wq, pair_name, collection, config_a, config_b, + pair_options, general, force_delete): status_name = get_status_name(pair_name, collection) key = ('sync', pair_name, collection) @@ -473,8 +560,12 @@ def sync_collection(wq, pair_name, collection, a, b, pair_options, general, try: cli_logger.info('Syncing {}'.format(status_name)) - status = load_status(general['status_path'], status_name) + status = load_status(general['status_path'], pair_name, + collection, data_type='items') or {} cli_logger.debug('Loaded status for {}'.format(status_name)) + + a = storage_instance_from_config(config_a) + b = storage_instance_from_config(config_b) sync( a, b, status, conflict_resolution=pair_options.get('conflict_resolution', None), @@ -484,7 +575,8 @@ def sync_collection(wq, pair_name, collection, a, b, pair_options, general, if not handle_cli_error(status_name): raise JobFailed() - save_status(general['status_path'], status_name, status) + save_status(general['status_path'], pair_name, collection, + data_type='items', data=status) class WorkerQueue(object): diff --git a/vdirsyncer/storage/base.py b/vdirsyncer/storage/base.py index 7a693de..48b0beb 100644 --- a/vdirsyncer/storage/base.py +++ b/vdirsyncer/storage/base.py @@ -30,10 +30,6 @@ class Storage(object): Strings can be either unicode strings or bytestrings. If bytestrings, an ASCII encoding is assumed. - :param collection: If None, the given URL or path is already directly - referring to a collection. Otherwise it will be treated as a basepath - to many collections (e.g. a vdir) and the given collection name will be - looked for. :param read_only: Whether the synchronization algorithm should avoid writes to this storage. Some storages accept no value other than ``True``. ''' @@ -44,10 +40,16 @@ class Storage(object): # overridden by subclasses. storage_name = None - # The string used in the config to denote a particular instance. Should be + # The string used in the config to denote a particular instance. Will be # overridden during instantiation. instance_name = None + # The machine-readable name of this collection. + collection = None + + # The human-readable name of this collection. + collection_human = None + # A value of True means the storage does not support write-methods such as # upload, update and delete. A value of False means the storage does # support those methods. @@ -56,7 +58,8 @@ class Storage(object): # The attribute values to show in the representation of the storage. _repr_attributes = () - def __init__(self, instance_name=None, read_only=None, collection=None): + def __init__(self, instance_name=None, read_only=None, collection=None, + collection_human=None): if read_only is None: read_only = self.read_only if self.read_only and not read_only: @@ -66,6 +69,7 @@ class Storage(object): instance_name = '{}/{}'.format(instance_name, collection) self.instance_name = instance_name self.collection = collection + self.collection_human = collection_human @classmethod def discover(cls, **kwargs): @@ -74,8 +78,21 @@ class Storage(object): :param **kwargs: Keyword arguments to additionally pass to the storage instances returned. You shouldn't pass `collection` here, otherwise TypeError will be raised. - :returns: Iterable of storages which represent the discovered - collections, all of which are passed kwargs during initialization. + :returns: iterable of ``storage_args``. + ``storage_args`` is a dictionary of ``**kwargs`` to pass to this + class to obtain a storage instance pointing to this collection. It + also must contain a ``"collection"`` key. That key's value is used + to match two collections together for synchronization. IOW it is a + machine-readable identifier for the collection, usually obtained + from the last segment of a URL or filesystem path. + + ''' + raise NotImplementedError() + + @classmethod + def join_collection(cls, collection, **kwargs): + '''Append the collection to the URL or path specified in ``**kwargs`` + and return the new arguments. ''' raise NotImplementedError() diff --git a/vdirsyncer/storage/dav.py b/vdirsyncer/storage/dav.py index 528ed8f..00bc4f1 100644 --- a/vdirsyncer/storage/dav.py +++ b/vdirsyncer/storage/dav.py @@ -250,9 +250,6 @@ class DavStorage(Storage): super(DavStorage, self).__init__(**kwargs) url = url.rstrip('/') + '/' - collection = kwargs.get('collection') - if collection is not None: - url = utils.urlparse.urljoin(url, collection) self.session = DavSession(url, username, password, verify, auth, useragent, verify_fingerprint) self.unsafe_href_chars = unsafe_href_chars @@ -271,10 +268,12 @@ class DavStorage(Storage): )) d = cls.discovery_class(DavSession(url=url, **discover_args)) for c in d.discover(): - base, collection = c['href'].rstrip('/').rsplit('/', 1) - s = cls(url=base, collection=collection, **kwargs) - s.displayname = c['displayname'] - yield s + url = c['href'] + _, collection = url.rstrip('/').rsplit('/', 1) + storage_args = {'url': url, 'collection': collection, + 'collection_human': c['displayname']} + storage_args.update(kwargs) + yield storage_args def _normalize_href(self, *args, **kwargs): return _normalize_href(self.session.url, *args, **kwargs) diff --git a/vdirsyncer/storage/filesystem.py b/vdirsyncer/storage/filesystem.py index 69f4f2d..9dc28e5 100644 --- a/vdirsyncer/storage/filesystem.py +++ b/vdirsyncer/storage/filesystem.py @@ -40,9 +40,6 @@ class FilesystemStorage(Storage): def __init__(self, path, fileext, encoding='utf-8', create=True, **kwargs): super(FilesystemStorage, self).__init__(**kwargs) path = expand_path(path) - collection = kwargs.get('collection') - if collection is not None: - path = os.path.join(path, collection) checkdir(path, create=create) self.path = path self.encoding = encoding @@ -54,9 +51,16 @@ class FilesystemStorage(Storage): raise TypeError('collection argument must not be given.') path = expand_path(path) for collection in os.listdir(path): - if os.path.isdir(os.path.join(path, collection)): - s = cls(path=path, collection=collection, **kwargs) - yield s + collection_path = os.path.join(path, collection) + if os.path.isdir(collection_path): + args = dict(collection=collection, path=collection_path, + **kwargs) + yield args + + @classmethod + def join_collection(cls, collection, **kwargs): + kwargs['path'] = os.path.join(kwargs['path'], collection) + return kwargs def _get_filepath(self, href): return os.path.join(self.path, href) From 2e2349c46d46ed11418caec1a490c1883332e149 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 16 Dec 2014 17:20:02 +0100 Subject: [PATCH 2/3] Add discover command --- tests/test_cli.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++ vdirsyncer/cli.py | 46 +++++++++++++++++++++++++++++++++------ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 68e4eb2..b65f8ac 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -349,3 +349,58 @@ def test_invalid_pairs_as_cli_arg(tmpdir): env={'VDIRSYNCER_CONFIG': str(cfg)}) assert result.exception assert 'pair foobar: collection d not found' in result.output.lower() + + +def test_discover_command(tmpdir): + cfg = tmpdir.join('config') + cfg.write(dedent(''' + [general] + status_path = {0}/status/ + + [storage foo] + type = filesystem + path = {0}/foo/ + fileext = .txt + + [storage bar] + type = filesystem + path = {0}/bar/ + fileext = .txt + + [pair foobar] + a = foo + b = bar + collections = from a + ''').format(str(tmpdir))) + + foo = tmpdir.mkdir('foo') + tmpdir.mkdir('bar') + + foo.mkdir('a') + foo.mkdir('b') + foo.mkdir('c') + + runner = CliRunner() + result = runner.invoke(cli.app, ['sync'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + lines = result.output.splitlines() + assert lines[0].startswith('Discovering') + assert 'Syncing foobar/a' in lines + assert 'Syncing foobar/b' in lines + assert 'Syncing foobar/c' in lines + + foo.mkdir('d') + result = runner.invoke(cli.app, ['sync'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + assert 'Syncing foobar/d' not in result.output + + result = runner.invoke(cli.app, ['discover'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + + result = runner.invoke(cli.app, ['sync'], + env={'VDIRSYNCER_CONFIG': str(cfg)}) + assert not result.exception + assert 'Syncing foobar/d' in result.output diff --git a/vdirsyncer/cli.py b/vdirsyncer/cli.py index 029ed47..d4ea850 100644 --- a/vdirsyncer/cli.py +++ b/vdirsyncer/cli.py @@ -432,21 +432,23 @@ def _create_app(): raise CliError('Error during reading config {}: {}' .format(fname, e)) + max_workers_option = click.option( + '--max-workers', default=0, type=click.IntRange(min=0, max=None), + help=('Use at most this many connections, 0 means unlimited.') + ) + @app.command() @click.argument('pairs', nargs=-1) @click.option('--force-delete/--no-force-delete', help=('Disable data-loss protection for the given pairs. ' 'Can be passed multiple times')) - @click.option('--max-workers', - default=0, type=click.IntRange(min=0, max=None), - help=('Use at most this many connections, 0 means ' - 'unlimited.')) + @max_workers_option @click.pass_context @catch_errors def sync(ctx, pairs, force_delete, max_workers): ''' - Synchronize the given pairs. If no pairs are given, all will be - synchronized. + Synchronize the given collections or pairs. If no arguments are given, + all will be synchronized. Examples: `vdirsyncer sync` will sync everything configured. @@ -471,6 +473,38 @@ def _create_app(): wq.join() + @app.command() + @click.argument('pairs', nargs=-1) + @max_workers_option + @click.pass_context + @catch_errors + def discover(ctx, pairs, max_workers): + ''' + Refresh collection cache for the given pairs. + ''' + general, all_pairs, all_storages = ctx.obj['config'] + cli_logger.debug('Using {} maximal workers.'.format(max_workers)) + wq = WorkerQueue(max_workers) + + for pair in (pairs or all_pairs): + try: + name_a, name_b, pair_options, storage_defaults = \ + all_pairs[pair] + except KeyError: + raise CliError('Pair not found: {}\n' + 'These are the pairs found: {}' + .format(pair, list(all_pairs))) + + wq.spawn_worker() + wq.put(functools.partial( + (lambda wq, **kwargs: collections_for_pair(**kwargs)), + status_path=general['status_path'], name_a=name_a, + name_b=name_b, pair_name=pair, config_a=all_storages[name_a], + config_b=all_storages[name_b], pair_options=pair_options, + skip_cache=True)) + + wq.join() + return app app = main = _create_app() From e219139e0817848134e29dd701c009b4a48926d0 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 16 Dec 2014 17:49:42 +0100 Subject: [PATCH 3/3] Add docs on collection discovery --- docs/tutorial.rst | 58 +++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index aae6a5f..4ac6bb5 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -91,26 +91,8 @@ further reference, it uses the storages More Configuration ================== -But what if we want to synchronize multiple addressbooks from the same server? -Of course we could create new pairs and storages for each addressbook, but that -is very tedious to do. Instead we will use a shortcut: - -- Remove the last segment from the URL, so that it ends with ``.../bob/`` - instead of ``.../bob/default/``. - -- Add the following line to the *pair* section:: - - [pair my_contacts] - ... - collections = ["default", "work"] - -This will synchronize -``https://owncloud.example.com/remote.php/carddav/addressbooks/bob/default/`` -with ``~/.contacts/default/`` and -``https://owncloud.example.com/remote.php/carddav/addressbooks/bob/work/`` with -``~/.contacts/work/``. Under the hood, vdirsyncer also just copies the pairs -and storages for each collection and appends the collection name to the path or -URL. +Conflict resolution +------------------- It almost seems like it could work. But what if the same item is changed on both sides? What should vdirsyncer do? By default, it will show an ugly error @@ -125,3 +107,39 @@ Earlier we wrote that ``b = my_contacts_remote``, so when vdirsyncer encounters the situation where an item changed on both sides, it will simply overwrite the local item with the one from the server. Of course ``a wins`` is also a valid value. + +Collection discovery +-------------------- + +Configuring each collection (=addressbook/calendar) becomes extremely +repetitive if they are all on the same server. Vdirsyncer can do this for you +by automatically downloading a list of the configured user's collections:: + + [pair my_contacts] + a = my_contacts_local + b = my_contacts_remote + collections = from b + + [storage my_contacts_local] + type = filesystem + path = ~/.contacts/ + fileext = .vcf + + [storage my_contacts_remote] + type = carddav + url = https://owncloud.example.com/remote.php/carddav/addressbooks/bob/ + username = bob + password = asdf + +With the above configuration, vdirsyncer will fetch all available collections +from the server, and create subdirectories for each of them in +``~/.contacts/``. For example, ownCloud's default addressbook ``"default"`` +would be synchronized to the location ``~/.contacts/default/``. + +Vdirsyncer fetches this list on first sync, and will re-fetch it if you change +your configuration file. However, if new collections are created on the server, +it will not automatically start synchronizing those [1]_. You should run +``vdirsyncer discover`` to re-fetch this list instead. + +.. [1] Because collections are added rarely, and checking for this case before + every synchronization isn't worth the overhead.