From 3f0415cc092416f8811da0c18a198513d0ef46d3 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 17 Feb 2015 23:10:24 +0100 Subject: [PATCH] Add basic repair tool Fix #172 --- CHANGELOG.rst | 2 ++ vdirsyncer/cli/__init__.py | 20 ++++++++++- vdirsyncer/cli/tasks.py | 68 +++++++++++++++++++++++++++++++++++++- vdirsyncer/cli/utils.py | 25 +++++++++----- 4 files changed, 104 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c2ca153..e267f0c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,8 @@ Version 0.4.3 - Removed some workarounds for Radicale. Upgrading to the latest Radicale will fix the issues. - Fixed issues with iCloud discovery. +- Vdirsyncer now includes a simple ``repair`` command that seeks to fix some + broken items. Version 0.4.2 ============= diff --git a/vdirsyncer/cli/__init__.py b/vdirsyncer/cli/__init__.py index 68d913c..3b5e12a 100644 --- a/vdirsyncer/cli/__init__.py +++ b/vdirsyncer/cli/__init__.py @@ -3,7 +3,7 @@ import functools import sys -from .tasks import discover_collections, sync_pair +from .tasks import discover_collections, repair_collection, sync_pair from .utils import CliError, WorkerQueue, cli_logger, handle_cli_error, \ load_config, parse_pairs_args from .. import __version__, log @@ -127,3 +127,21 @@ def discover(ctx, pairs, max_workers): )) wq.join() + + +@app.command() +@click.argument('collection') +@click.pass_context +@catch_errors +def repair(ctx, collection): + ''' + Repair a given collection: `storage/collection/storage` + + `vdirsyncer repair calendars_local/foo` repairs the `foo` collection of the + `calendars_local` storage. + + It will download all items and repair their properties if necessary. + Currently this only fixes absent or duplicate UIDs. + ''' + general, all_pairs, all_storages = ctx.obj['config'] + repair_collection(general, all_pairs, all_storages, collection) diff --git a/vdirsyncer/cli/tasks.py b/vdirsyncer/cli/tasks.py index ebfefab..8ac7189 100644 --- a/vdirsyncer/cli/tasks.py +++ b/vdirsyncer/cli/tasks.py @@ -2,12 +2,14 @@ import functools import json +import uuid from .utils import CliError, JobFailed, cli_logger, collections_for_pair, \ get_status_name, handle_cli_error, load_status, save_status, \ - storage_instance_from_config + storage_class_from_config, storage_instance_from_config from ..sync import sync +from ..utils.vobject import Item, to_unicode_lines def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs, @@ -84,3 +86,67 @@ def discover_collections(wq, pair_name, **kwargs): collections = None cli_logger.info('Saved for {}: collections = {}' .format(pair_name, json.dumps(collections))) + + +def repair_collection(general, all_pairs, all_storages, collection): + storage_name, collection = collection, None + if '/' in storage_name: + storage_name, collection = storage_name.split('/') + + config = all_storages[storage_name] + storage_type = config['type'] + + if collection is not None: + cli_logger.info('Discovering collections (skipping cache).') + cls, config = storage_class_from_config(config) + for config in cls.discover(**config): + if config['collection'] == collection: + break + else: + raise CliError('Couldn\'t find collection {} for storage {}.' + .format(collection, storage_name)) + + config['type'] = storage_type + storage = storage_instance_from_config(config) + + cli_logger.info('Repairing {}/{}'.format(storage_name, collection)) + cli_logger.warning('Make sure no other program is talking to the server.') + _repair_collection(storage) + + +def _repair_collection(storage): + seen_uids = set() + all_hrefs = list(storage.list()) + for i, (href, _) in enumerate(all_hrefs): + item, etag = storage.get(href) + cli_logger.info('[{}/{}] Processing {}' + .format(i, len(all_hrefs), href)) + + parsed = item.parsed + changed = False + if parsed is None: + cli_logger.warning('Item {} can\'t be parsed, skipping.' + .format(href)) + continue + + if item.uid is None or item.uid in seen_uids: + if item.uid is None: + cli_logger.warning('No UID, assigning random one.') + else: + cli_logger.warning('Duplicate UID, reassigning random one.') + + new_uid = uuid.uuid4() + stack = [parsed] + while stack: + component = stack.pop() + if component.name in ('VEVENT', 'VTODO', 'VJOURNAL', 'VCARD'): + component['UID'] = new_uid + changed = True + else: + stack.extend(component.subcomponents) + + new_item = Item(u'\n'.join(to_unicode_lines(parsed))) + assert new_item.uid + seen_uids.add(new_item.uid) + if changed: + storage.update(href, new_item, etag) diff --git a/vdirsyncer/cli/utils.py b/vdirsyncer/cli/utils.py index 9867cc8..eed430d 100644 --- a/vdirsyncer/cli/utils.py +++ b/vdirsyncer/cli/utils.py @@ -45,7 +45,7 @@ class JobFailed(RuntimeError): pass -def handle_cli_error(status_name='sync'): +def handle_cli_error(status_name=None): ''' Print a useful error message for the current exception. @@ -80,20 +80,27 @@ def handle_cli_error(status_name='sync'): ) except IdentConflict as e: cli_logger.error( - '{status_name}: Storage "{name}" contains multiple items with the ' - 'same UID or even content. Vdirsyncer will now abort the ' - 'synchronization of this collection, because the fix for this is ' - 'not clear; It could be the result of a badly behaving server.\n' - '\n{href_list}\n' + '{status_name}: Storage "{storage.instance_name}" contains ' + 'multiple items with the same UID or even content. Vdirsyncer ' + 'will now abort the synchronization of this collection, because ' + 'the fix for this is not clear; It could be the result of a badly ' + 'behaving server. You can try running:\n\n' + ' vdirsyncer repair {storage.instance_name}\n\n' + 'But make sure to have a backup of your data in some form. The ' + 'offending hrefs are:\n\n{href_list}\n' .format(status_name=status_name, - name=e.storage.instance_name, + storage=e.storage, href_list='\n'.join(map(repr, e.hrefs))) ) except (click.Abort, KeyboardInterrupt, JobFailed): pass except Exception as e: - cli_logger.exception('Unhandled exception occured while syncing {}.' - .format(status_name)) + if status_name: + msg = 'Unhandled exception occured for {}.'.format(status_name) + else: + msg = 'Unhandled exception occured.' + + cli_logger.exception(msg) def validate_section_name(name, section_type):