From 5a508ae327404cc539d70dbbd3d742613e375f5f Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 22 Mar 2017 15:08:43 +0100 Subject: [PATCH] Abstract status into own class (#607) * Refactor status management in separate class * Rename to SubStatus, remove unused field * Move item cache out of status * stylefix --- vdirsyncer/sync.py | 282 ++++++++++++++++++++++++++++++++------------- 1 file changed, 202 insertions(+), 80 deletions(-) diff --git a/vdirsyncer/sync.py b/vdirsyncer/sync.py index b2bcc30..7c0e249 100644 --- a/vdirsyncer/sync.py +++ b/vdirsyncer/sync.py @@ -88,9 +88,143 @@ class PartialSync(SyncError): storage = None +class _IdentAlreadyExists(SyncError): + '''Like IdentConflict, but for internal state. If this bubbles up, we don't + have a data race, but a bug.''' + old_href = None + new_href = None + + def to_ident_conflict(self, storage): + return IdentConflict(storage=storage, + hrefs=[self.old_href, self.new_href]) + + +class _Status(object): + def __init__(self, ident_to_props): + self._ident_to_props = ident_to_props + self._new_ident_to_props = {} + + self._href_to_status_a = dict((meta['href'], (ident, meta)) + for ident, (meta, _) + in self._ident_to_props.items()) + + self._href_to_status_b = dict((meta['href'], (ident, meta)) + for ident, (_, meta) + in self._ident_to_props.items()) + + def insert_ident_a(self, ident, props): + props_a, props_b = self._new_ident_to_props.get(ident, (None, None)) + if props_a is not None: + raise _IdentAlreadyExists(old_href=props.href, + new_href=props_a['href']) + self._new_ident_to_props[ident] = props.to_status(), props_b + + def insert_ident_b(self, ident, props): + props_a, props_b = self._new_ident_to_props.get(ident, (None, None)) + if props_b is not None: + raise _IdentAlreadyExists(old_href=props.href, + new_href=props_b['href']) + self._new_ident_to_props[ident] = props_a, props.to_status() + + def update_ident_a(self, ident, props): + self._new_ident_to_props[ident] = ( + props.to_status(), + self._new_ident_to_props[ident][1], + ) + + def update_ident_b(self, ident, props): + self._new_ident_to_props[ident] = ( + self._new_ident_to_props[ident][0], + props.to_status(), + ) + + def remove_ident(self, ident): + del self._new_ident_to_props[ident] + + def get_a(self, ident): + rv = self._ident_to_props[ident][0] + if rv is None: + raise KeyError() + return _ItemMetadata(**rv) + + def get_b(self, ident): + rv = self._ident_to_props[ident][1] + if rv is None: + raise KeyError() + return _ItemMetadata(**rv) + + def get_new_a(self, ident): + rv = self._new_ident_to_props[ident][0] + if rv is None: + raise KeyError() + return _ItemMetadata(**rv) + + def get_new_b(self, ident): + rv = self._new_ident_to_props[ident][1] + if rv is None: + raise KeyError() + return _ItemMetadata(**rv) + + def iter_old(self): + return iter(self._ident_to_props) + + def iter_new(self): + return iter(self._new_ident_to_props) + + def rollback(self, ident): + if ident in self._ident_to_props: + self._new_ident_to_props[ident] = self._ident_to_props[ident] + else: + self._new_ident_to_props.pop(ident, None) + + def get_by_href_a(self, href, default=(None, None)): + try: + ident, meta = self._href_to_status_a[href] + except KeyError: + return default + else: + return ident, _ItemMetadata(**meta) + + def get_by_href_b(self, href, default=(None, None)): + try: + ident, meta = self._href_to_status_b[href] + except KeyError: + return default + else: + return ident, _ItemMetadata(**meta) + + def new_to_old_status(self): + for meta_a, meta_b in self._new_ident_to_props.values(): + assert meta_a is not None + assert meta_b is not None + + self._ident_to_props.clear() + self._ident_to_props.update(self._new_ident_to_props) + + +class _SubStatus(object): + def __init__(self, parent, side): + self.parent = parent + assert side in 'ab' + + self.remove_ident = parent.remove_ident + + if side == 'a': + self.insert_ident = parent.insert_ident_a + self.update_ident = parent.update_ident_a + self.get = parent.get_a + self.get_new = parent.get_new_a + self.get_by_href = parent.get_by_href_a + else: + self.insert_ident = parent.insert_ident_b + self.update_ident = parent.update_ident_b + self.get = parent.get_b + self.get_new = parent.get_new_b + self.get_by_href = parent.get_by_href_b + + class _ItemMetadata: href = None - _item = None hash = None etag = None @@ -99,15 +233,6 @@ class _ItemMetadata: assert hasattr(self, k) setattr(self, k, v) - @property - def item(self): - return self._item - - @item.setter - def item(self, item): - self._item = item - self.hash = item.hash - def to_status(self): return { 'href': self.href, @@ -120,35 +245,25 @@ class _StorageInfo(object): '''A wrapper class that holds prefetched items, the status and other things.''' def __init__(self, storage, status): - ''' - :param status: {ident: {'href': href, 'etag': etag}} - ''' self.storage = storage - - #: Represents the status as given. Must not be modified. self.status = status - - #: Represents the current state of the storage and is modified as items - #: are uploaded and downloaded. Will be dumped into status. - self.new_status = None + self._item_cache = {} def prepare_new_status(self): - href_to_status = dict((meta.href, (ident, meta)) - for ident, meta - in self.status.items()) - + storage_nonempty = False prefetch = [] - self.new_status = {} def _store_props(ident, props): - new_props = self.new_status.setdefault(ident, props) - if new_props is not props: - raise IdentConflict(storage=self.storage, - hrefs=[new_props.href, - props.href]) + nonlocal storage_nonempty + storage_nonempty = True + + try: + self.status.insert_ident(ident, props) + except _IdentAlreadyExists as e: + raise e.to_ident_conflict(self.storage) for href, etag in self.storage.list(): - ident, meta = href_to_status.get(href, (None, None)) + ident, meta = self.status.get_by_href(href) if meta is None: meta = _ItemMetadata() @@ -166,17 +281,21 @@ class _StorageInfo(object): if prefetch else ()): _store_props(item.ident, _ItemMetadata( href=href, - etag=etag, - item=item + hash=item.hash, + etag=etag )) + self.set_item_cache(item.ident, item) + + return storage_nonempty def is_changed(self, ident): - status = self.status.get(ident, None) - meta = self.new_status[ident] - - if status is None: # new item + try: + status = self.status.get(ident) + except KeyError: # new item return True + meta = self.status.get_new(ident) + if meta.etag != status.etag: # etag changed old_hash = status.hash if old_hash is None or meta.hash != old_hash: @@ -186,6 +305,13 @@ class _StorageInfo(object): # only etag changed return False + def set_item_cache(self, ident, item): + assert self.status.get_new(ident).hash == item.hash + self._item_cache[ident] = item + + def get_item_cache(self, ident): + return self._item_cache[ident] + def _migrate_status(status): for ident in list(status): @@ -241,23 +367,19 @@ def sync(storage_a, storage_b, status, conflict_resolution=None, conflict_resolution = lambda a, b: b _migrate_status(status) + status_nonempty = bool(status) + status = _Status(status) - a_status = {} - b_status = {} - for ident, (meta_a, meta_b) in status.items(): - a_status[ident] = _ItemMetadata(**meta_a) - b_status[ident] = _ItemMetadata(**meta_b) + a_info = _StorageInfo(storage_a, _SubStatus(status, 'a')) + b_info = _StorageInfo(storage_b, _SubStatus(status, 'b')) - a_info = _StorageInfo(storage_a, a_status) - b_info = _StorageInfo(storage_b, b_status) + a_nonempty = a_info.prepare_new_status() + b_nonempty = b_info.prepare_new_status() - a_info.prepare_new_status() - b_info.prepare_new_status() - - if status and not force_delete: - if a_info.new_status and not b_info.new_status: + if status_nonempty and not force_delete: + if a_nonempty and not b_nonempty: raise StorageEmpty(empty_storage=storage_b) - elif b_info.new_status and not a_info.new_status: + elif not a_nonempty and b_nonempty: raise StorageEmpty(empty_storage=storage_a) actions = list(_get_actions(a_info, b_info)) @@ -272,13 +394,7 @@ def sync(storage_a, storage_b, status, conflict_resolution=None, else: raise - status.clear() - for ident in uniq(itertools.chain(a_info.new_status, - b_info.new_status)): - status[ident] = ( - a_info.new_status[ident].to_status(), - b_info.new_status[ident].to_status() - ) + status.new_to_old_status() class Action: @@ -307,11 +423,7 @@ class Action: raise e def rollback(self, a, b): - for info in (a, b): - if self.ident in info.status: - info.new_status[self.ident] = info.status[self.ident] - else: - info.new_status.pop(self.ident, None) + a.status.parent.rollback(self.ident) class Upload(Action): @@ -329,12 +441,11 @@ class Upload(Action): .format(self.ident, self.dest.storage)) href, etag = self.dest.storage.upload(self.item) - assert self.ident not in self.dest.new_status - self.dest.new_status[self.ident] = _ItemMetadata( + self.dest.status.insert_ident(self.ident, _ItemMetadata( href=href, hash=self.item.hash, etag=etag - ) + )) class Update(Action): @@ -345,15 +456,15 @@ class Update(Action): def _run_impl(self, a, b): if self.dest.storage.read_only: - meta = _ItemMetadata(item=self.item) + meta = _ItemMetadata(hash=self.item.hash) else: sync_logger.info(u'Copying (updating) item {} to {}' .format(self.ident, self.dest.storage)) - meta = self.dest.new_status[self.ident] + meta = self.dest.status.get_new(self.ident) meta.etag = \ self.dest.storage.update(meta.href, self.item, meta.etag) - self.dest.new_status[self.ident] = meta + self.dest.status.update_ident(self.ident, meta) class Delete(Action): @@ -362,12 +473,13 @@ class Delete(Action): self.dest = dest def _run_impl(self, a, b): - meta = self.dest.new_status[self.ident] + meta = self.dest.status.get_new(self.ident) if not self.dest.storage.read_only: sync_logger.info(u'Deleting item {} from {}' .format(self.ident, self.dest.storage)) self.dest.storage.delete(meta.href, meta.etag) - del self.dest.new_status[self.ident] + + self.dest.status.remove_ident(self.ident) class ResolveConflict(Action): @@ -378,8 +490,9 @@ class ResolveConflict(Action): with self.auto_rollback(a, b): sync_logger.info(u'Doing conflict resolution for item {}...' .format(self.ident)) - meta_a = a.new_status[self.ident] - meta_b = b.new_status[self.ident] + + meta_a = a.status.get_new(self.ident) + meta_b = b.status.get_new(self.ident) if meta_a.hash == meta_b.hash: sync_logger.info(u'...same content on both sides.') @@ -387,7 +500,9 @@ class ResolveConflict(Action): raise SyncConflict(ident=self.ident, href_a=meta_a.href, href_b=meta_b.href) elif callable(conflict_resolution): - new_item = conflict_resolution(meta_a.item, meta_b.item) + item_a = a.get_item_cache(self.ident) + item_b = b.get_item_cache(self.ident) + new_item = conflict_resolution(item_a, item_b) if new_item.hash != meta_a.hash: Update(new_item, a).run(a, b, conflict_resolution, partial_sync) @@ -401,10 +516,17 @@ class ResolveConflict(Action): def _get_actions(a_info, b_info): - for ident in uniq(itertools.chain(a_info.new_status, b_info.new_status, - a_info.status)): - a = a_info.new_status.get(ident, None) # item exists in a - b = b_info.new_status.get(ident, None) # item exists in b + for ident in uniq(itertools.chain(a_info.status.parent.iter_new(), + a_info.status.parent.iter_old())): + try: + a = a_info.status.get_new(ident) + except KeyError: + a = None + + try: + b = b_info.status.get_new(ident) + except KeyError: + b = None if a and b: a_changed = a_info.is_changed(ident) @@ -415,15 +537,15 @@ def _get_actions(a_info, b_info): yield ResolveConflict(ident) elif a_changed and not b_changed: # item was only modified in a - yield Update(a.item, b_info) + yield Update(a_info.get_item_cache(ident), b_info) elif not a_changed and b_changed: # item was only modified in b - yield Update(b.item, a_info) + yield Update(b_info.get_item_cache(ident), a_info) elif a and not b: if a_info.is_changed(ident): # was deleted from b but modified on a # OR: new item was created in a - yield Upload(a.item, b_info) + yield Upload(a_info.get_item_cache(ident), b_info) else: # was deleted from b and not modified on a yield Delete(ident, a_info) @@ -431,7 +553,7 @@ def _get_actions(a_info, b_info): if b_info.is_changed(ident): # was deleted from a but modified on b # OR: new item was created in b - yield Upload(b.item, a_info) + yield Upload(b_info.get_item_cache(ident), a_info) else: # was deleted from a and not changed on b yield Delete(ident, b_info)