mirror of
https://github.com/samsonjs/vdirsyncer.git
synced 2026-03-25 08:55:50 +00:00
parent
00ce809a34
commit
0e89753757
2 changed files with 352 additions and 120 deletions
|
|
@ -184,7 +184,7 @@ def test_deletion():
|
|||
assert items(a) == items(b) == {item2.raw}
|
||||
|
||||
|
||||
def test_insert_hash():
|
||||
def test_broken_status():
|
||||
a = MemoryStorage()
|
||||
b = MemoryStorage()
|
||||
status = {}
|
||||
|
|
@ -197,8 +197,8 @@ def test_insert_hash():
|
|||
del d['hash']
|
||||
|
||||
a.update(href, Item('UID:1\nHAHA:YES'), etag)
|
||||
sync(a, b, status)
|
||||
assert 'hash' in status['1'][0] and 'hash' in status['1'][1]
|
||||
with pytest.raises(SyncConflict):
|
||||
sync(a, b, status)
|
||||
|
||||
|
||||
def test_already_synced():
|
||||
|
|
@ -390,6 +390,7 @@ def test_partial_sync_revert():
|
|||
a.items[next(iter(a.items))] = ('foo', Item('UID:2\nupdated'))
|
||||
assert items(a) == {'UID:2\nupdated'}
|
||||
sync(a, b, status, partial_sync='revert')
|
||||
assert len(status) == 1
|
||||
assert items(a) == {'UID:2\nupdated'}
|
||||
sync(a, b, status, partial_sync='revert')
|
||||
assert items(a) == {'UID:2'}
|
||||
|
|
|
|||
|
|
@ -12,9 +12,11 @@ Yang: http://blog.ezyang.com/2012/08/how-offlineimap-works/
|
|||
Some modifications to it are explained in
|
||||
https://unterwaditzer.net/2016/sync-algorithm.html
|
||||
'''
|
||||
import abc
|
||||
import contextlib
|
||||
import itertools
|
||||
import logging
|
||||
import sqlite3
|
||||
|
||||
from . import exceptions
|
||||
from .utils import uniq
|
||||
|
|
@ -99,107 +101,345 @@ class _IdentAlreadyExists(SyncError):
|
|||
hrefs=[self.old_href, self.new_href])
|
||||
|
||||
|
||||
class _Status(object):
|
||||
def __init__(self, ident_to_props):
|
||||
self._ident_to_props = ident_to_props
|
||||
self._new_ident_to_props = {}
|
||||
|
||||
self._href_to_status_a = dict((meta['href'], (ident, meta))
|
||||
for ident, (meta, _)
|
||||
in self._ident_to_props.items())
|
||||
|
||||
self._href_to_status_b = dict((meta['href'], (ident, meta))
|
||||
for ident, (_, meta)
|
||||
in self._ident_to_props.items())
|
||||
class _StatusBase(metaclass=abc.ABCMeta):
|
||||
@abc.abstractmethod
|
||||
def transaction(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def insert_ident_a(self, ident, props):
|
||||
props_a, props_b = self._new_ident_to_props.get(ident, (None, None))
|
||||
if props_a is not None:
|
||||
raise _IdentAlreadyExists(old_href=props.href,
|
||||
new_href=props_a['href'])
|
||||
self._new_ident_to_props[ident] = props.to_status(), props_b
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def insert_ident_b(self, ident, props):
|
||||
props_a, props_b = self._new_ident_to_props.get(ident, (None, None))
|
||||
if props_b is not None:
|
||||
raise _IdentAlreadyExists(old_href=props.href,
|
||||
new_href=props_b['href'])
|
||||
self._new_ident_to_props[ident] = props_a, props.to_status()
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def update_ident_a(self, ident, props):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def update_ident_b(self, ident, props):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def remove_ident(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_a(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_b(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_new_a(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_new_b(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def iter_old(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def iter_new(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_by_href_a(self, href, default=(None, None)):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_by_href_b(self, href, default=(None, None)):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def rollback(self, ident):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class _SqliteStatus(_StatusBase):
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
def __init__(self, path):
|
||||
self._path = path
|
||||
self._c = sqlite3.connect(path)
|
||||
self._c.row_factory = sqlite3.Row
|
||||
self._update_schema()
|
||||
|
||||
def _update_schema(self):
|
||||
if self._is_latest_version():
|
||||
return
|
||||
|
||||
# If we ever bump the schema version, we will need a way to migrate
|
||||
# data.
|
||||
|
||||
self._c.execute('''CREATE TABLE meta (
|
||||
"version" INTEGER PRIMARY KEY
|
||||
); ''')
|
||||
self._c.execute('INSERT INTO meta (version) VALUES (?)',
|
||||
(self.SCHEMA_VERSION,))
|
||||
|
||||
# I know that this is a bad schema, but right there is just too little
|
||||
# gain in deduplicating the .._a and .._b columns.
|
||||
self._c.execute('''CREATE TABLE status (
|
||||
"ident" TEXT PRIMARY KEY NOT NULL,
|
||||
"href_a" TEXT,
|
||||
"href_b" TEXT,
|
||||
"hash_a" TEXT NOT NULL,
|
||||
"hash_b" TEXT NOT NULL,
|
||||
"etag_a" TEXT,
|
||||
"etag_b" TEXT
|
||||
); ''')
|
||||
self._c.execute('CREATE UNIQUE INDEX by_href_a ON status(href_a)')
|
||||
self._c.execute('CREATE UNIQUE INDEX by_href_b ON status(href_b)')
|
||||
|
||||
# We cannot add NOT NULL here because data is first fetched for the
|
||||
# storage a, then storage b. Inbetween the `_b`-columns are filled with
|
||||
# NULL.
|
||||
#
|
||||
# In an ideal world we would be able to start a transaction with one
|
||||
# cursor, write our new data into status and simultaneously query the
|
||||
# old status data using a different cursor. Unfortunately sqlite
|
||||
# enforces NOT NULL constraints immediately, not just at commit. Since
|
||||
# there is also no way to alter constraints on a table (disable
|
||||
# constraints on start of transaction and reenable on end), it's a
|
||||
# separate table now that just gets copied over before we commit.
|
||||
# That's a lot of copying, sadly.
|
||||
self._c.execute('''CREATE TABLE new_status (
|
||||
"ident" TEXT PRIMARY KEY NOT NULL,
|
||||
"href_a" TEXT,
|
||||
"href_b" TEXT,
|
||||
"hash_a" TEXT,
|
||||
"hash_b" TEXT,
|
||||
"etag_a" TEXT,
|
||||
"etag_b" TEXT
|
||||
); ''')
|
||||
|
||||
def _is_latest_version(self):
|
||||
try:
|
||||
return bool(self._c.execute(
|
||||
'SELECT version FROM meta WHERE version = ?',
|
||||
(self.SCHEMA_VERSION,)
|
||||
).fetchone())
|
||||
except sqlite3.OperationalError:
|
||||
return False
|
||||
|
||||
@contextlib.contextmanager
|
||||
def transaction(self):
|
||||
with self._c:
|
||||
try:
|
||||
yield
|
||||
self._c.execute('DELETE FROM status')
|
||||
self._c.execute('INSERT INTO status '
|
||||
'SELECT * FROM new_status')
|
||||
finally:
|
||||
self._c.execute('DELETE FROM new_status')
|
||||
|
||||
def insert_ident_a(self, ident, a_props):
|
||||
# FIXME: Super inefficient
|
||||
old_props = self.get_new_a(ident)
|
||||
if old_props is not None:
|
||||
raise _IdentAlreadyExists(old_href=old_props.href,
|
||||
new_href=a_props.href)
|
||||
b_props = self.get_new_b(ident) or _ItemMetadata()
|
||||
self._c.execute(
|
||||
'INSERT OR REPLACE INTO new_status '
|
||||
'VALUES(?, ?, ?, ?, ?, ?, ?)',
|
||||
(ident, a_props.href, b_props.href, a_props.hash, b_props.hash,
|
||||
a_props.etag, b_props.etag)
|
||||
)
|
||||
|
||||
def insert_ident_b(self, ident, b_props):
|
||||
# FIXME: Super inefficient
|
||||
old_props = self.get_new_b(ident)
|
||||
if old_props is not None:
|
||||
raise _IdentAlreadyExists(old_href=old_props.href,
|
||||
new_href=b_props.href)
|
||||
a_props = self.get_new_a(ident) or _ItemMetadata()
|
||||
self._c.execute(
|
||||
'INSERT OR REPLACE INTO new_status '
|
||||
'VALUES(?, ?, ?, ?, ?, ?, ?)',
|
||||
(ident, a_props.href, b_props.href, a_props.hash, b_props.hash,
|
||||
a_props.etag, b_props.etag)
|
||||
)
|
||||
|
||||
def update_ident_a(self, ident, props):
|
||||
self._new_ident_to_props[ident] = (
|
||||
props.to_status(),
|
||||
self._new_ident_to_props[ident][1],
|
||||
c = self._c.cursor()
|
||||
c.execute(
|
||||
'UPDATE new_status'
|
||||
' SET href_a=?, hash_a=?, etag_a=?'
|
||||
' WHERE ident=?',
|
||||
(props.href, props.hash, props.etag, ident)
|
||||
)
|
||||
self._c.commit()
|
||||
assert c.rowcount > 0
|
||||
|
||||
def update_ident_b(self, ident, props):
|
||||
self._new_ident_to_props[ident] = (
|
||||
self._new_ident_to_props[ident][0],
|
||||
props.to_status(),
|
||||
c = self._c.cursor()
|
||||
c.execute(
|
||||
'UPDATE new_status'
|
||||
' SET href_b=?, hash_b=?, etag_b=?'
|
||||
' WHERE ident=?',
|
||||
(props.href, props.hash, props.etag, ident)
|
||||
)
|
||||
self._c.commit()
|
||||
assert c.rowcount > 0
|
||||
|
||||
def remove_ident(self, ident):
|
||||
del self._new_ident_to_props[ident]
|
||||
self._c.execute('DELETE FROM new_status WHERE ident=?', (ident,))
|
||||
|
||||
def _get_impl(self, ident, side, table):
|
||||
res = self._c.execute('SELECT href_{side} AS href,'
|
||||
' hash_{side} AS hash,'
|
||||
' etag_{side} AS etag '
|
||||
'FROM {table} WHERE ident=?'
|
||||
.format(side=side, table=table),
|
||||
(ident,)).fetchone()
|
||||
if res is None:
|
||||
return None
|
||||
|
||||
if res['hash'] is None: # FIXME: Implement as constraint in db
|
||||
assert res['href'] is None
|
||||
assert res['etag'] is None
|
||||
return None
|
||||
|
||||
res = dict(res)
|
||||
return _ItemMetadata(**res)
|
||||
|
||||
def get_a(self, ident):
|
||||
rv = self._ident_to_props[ident][0]
|
||||
if rv is None:
|
||||
raise KeyError()
|
||||
return _ItemMetadata(**rv)
|
||||
return self._get_impl(ident, side='a', table='status')
|
||||
|
||||
def get_b(self, ident):
|
||||
rv = self._ident_to_props[ident][1]
|
||||
if rv is None:
|
||||
raise KeyError()
|
||||
return _ItemMetadata(**rv)
|
||||
return self._get_impl(ident, side='b', table='status')
|
||||
|
||||
def get_new_a(self, ident):
|
||||
rv = self._new_ident_to_props[ident][0]
|
||||
if rv is None:
|
||||
raise KeyError()
|
||||
return _ItemMetadata(**rv)
|
||||
return self._get_impl(ident, side='a', table='new_status')
|
||||
|
||||
def get_new_b(self, ident):
|
||||
rv = self._new_ident_to_props[ident][1]
|
||||
if rv is None:
|
||||
raise KeyError()
|
||||
return _ItemMetadata(**rv)
|
||||
return self._get_impl(ident, side='b', table='new_status')
|
||||
|
||||
def iter_old(self):
|
||||
return iter(self._ident_to_props)
|
||||
return iter(res['ident'] for res in
|
||||
self._c.execute('SELECT ident FROM status').fetchall())
|
||||
|
||||
def iter_new(self):
|
||||
return iter(self._new_ident_to_props)
|
||||
return iter(res['ident'] for res in
|
||||
self._c.execute('SELECT ident FROM new_status').fetchall())
|
||||
|
||||
def rollback(self, ident):
|
||||
if ident in self._ident_to_props:
|
||||
self._new_ident_to_props[ident] = self._ident_to_props[ident]
|
||||
else:
|
||||
self._new_ident_to_props.pop(ident, None)
|
||||
a = self.get_a(ident)
|
||||
b = self.get_b(ident)
|
||||
assert (a is None) == (b is None)
|
||||
|
||||
if a is None and b is None:
|
||||
self.remove_ident(ident)
|
||||
return
|
||||
|
||||
self._c.execute(
|
||||
'INSERT OR REPLACE INTO new_status'
|
||||
' VALUES (?, ?, ?, ?, ?, ?, ?)',
|
||||
(ident, a.href, b.href, a.hash, b.hash, a.etag, b.etag)
|
||||
)
|
||||
|
||||
def _get_by_href_impl(self, href, default=(None, None), side=None):
|
||||
res = self._c.execute(
|
||||
'SELECT ident, hash_{side} AS hash, etag_{side} AS etag '
|
||||
'FROM status WHERE href_{side}=?'.format(side=side),
|
||||
(href,)).fetchone()
|
||||
if not res:
|
||||
return default
|
||||
return res['ident'], _ItemMetadata(
|
||||
href=href,
|
||||
hash=res['hash'],
|
||||
etag=res['etag'],
|
||||
)
|
||||
|
||||
def get_by_href_a(self, *a, **kw):
|
||||
kw['side'] = 'a'
|
||||
return self._get_by_href_impl(*a, **kw)
|
||||
|
||||
def get_by_href_b(self, *a, **kw):
|
||||
kw['side'] = 'b'
|
||||
return self._get_by_href_impl(*a, **kw)
|
||||
|
||||
|
||||
class _Status(_StatusBase):
|
||||
|
||||
def __init__(self, ident_to_props):
|
||||
self._db = _SqliteStatus(':memory:')
|
||||
self._ident_to_props = ident_to_props
|
||||
|
||||
def insert_ident_a(self, ident, props):
|
||||
return self._db.insert_ident_a(ident, props)
|
||||
|
||||
def insert_ident_b(self, ident, props):
|
||||
return self._db.insert_ident_b(ident, props)
|
||||
|
||||
def update_ident_a(self, ident, props):
|
||||
return self._db.update_ident_a(ident, props)
|
||||
|
||||
def update_ident_b(self, ident, props):
|
||||
return self._db.update_ident_b(ident, props)
|
||||
|
||||
def remove_ident(self, ident):
|
||||
return self._db.remove_ident(ident)
|
||||
|
||||
def get_a(self, ident):
|
||||
return self._db.get_a(ident)
|
||||
|
||||
def get_b(self, ident):
|
||||
return self._db.get_b(ident)
|
||||
|
||||
def get_new_a(self, ident):
|
||||
return self._db.get_new_a(ident)
|
||||
|
||||
def get_new_b(self, ident):
|
||||
return self._db.get_new_b(ident)
|
||||
|
||||
def iter_old(self):
|
||||
return self._db.iter_old()
|
||||
|
||||
def iter_new(self):
|
||||
return self._db.iter_new()
|
||||
|
||||
def get_by_href_a(self, href, default=(None, None)):
|
||||
try:
|
||||
ident, meta = self._href_to_status_a[href]
|
||||
except KeyError:
|
||||
return default
|
||||
else:
|
||||
return ident, _ItemMetadata(**meta)
|
||||
return self._db.get_by_href_a(href, default)
|
||||
|
||||
def get_by_href_b(self, href, default=(None, None)):
|
||||
try:
|
||||
ident, meta = self._href_to_status_b[href]
|
||||
except KeyError:
|
||||
return default
|
||||
else:
|
||||
return ident, _ItemMetadata(**meta)
|
||||
return self._db.get_by_href_b(href, default)
|
||||
|
||||
def new_to_old_status(self):
|
||||
for meta_a, meta_b in self._new_ident_to_props.values():
|
||||
assert meta_a is not None
|
||||
assert meta_b is not None
|
||||
def rollback(self, ident):
|
||||
return self._db.rollback(ident)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def transaction(self):
|
||||
with self._db.transaction():
|
||||
for ident, (a, b) in self._ident_to_props.items():
|
||||
if a.get('hash') is None or b.get('hash') is None:
|
||||
continue
|
||||
params = (ident, a.get('href'), a.get('hash'), a.get('etag'),
|
||||
b.get('href'), b.get('hash'), b.get('etag'))
|
||||
|
||||
self._db._c.execute(
|
||||
'INSERT INTO status'
|
||||
' (ident, href_a, hash_a, etag_a,'
|
||||
' href_b, hash_b, etag_b)'
|
||||
' VALUES (?, ?, ?, ?, ?, ?, ?)',
|
||||
params
|
||||
)
|
||||
yield
|
||||
|
||||
self._ident_to_props.clear()
|
||||
self._ident_to_props.update(self._new_ident_to_props)
|
||||
for ident in self._db.iter_old():
|
||||
a = self._db.get_a(ident)
|
||||
b = self._db.get_b(ident)
|
||||
self._ident_to_props[ident] = a.to_status(), b.to_status()
|
||||
|
||||
|
||||
class _SubStatus(object):
|
||||
|
|
@ -264,16 +504,12 @@ class _StorageInfo(object):
|
|||
|
||||
for href, etag in self.storage.list():
|
||||
ident, meta = self.status.get_by_href(href)
|
||||
if meta is None:
|
||||
meta = _ItemMetadata()
|
||||
|
||||
if meta.href != href or meta.etag != etag:
|
||||
if meta is None or meta.href != href or meta.etag != etag:
|
||||
# Either the item is completely new, or updated
|
||||
# In both cases we should prefetch
|
||||
prefetch.append(href)
|
||||
else:
|
||||
meta.href = href
|
||||
meta.etag = etag
|
||||
# Metadata is completely identical
|
||||
_store_props(ident, meta)
|
||||
|
||||
# Prefetch items
|
||||
|
|
@ -289,24 +525,21 @@ class _StorageInfo(object):
|
|||
return storage_nonempty
|
||||
|
||||
def is_changed(self, ident):
|
||||
try:
|
||||
status = self.status.get(ident)
|
||||
except KeyError: # new item
|
||||
old_meta = self.status.get(ident)
|
||||
if old_meta is None: # new item
|
||||
return True
|
||||
|
||||
meta = self.status.get_new(ident)
|
||||
new_meta = self.status.get_new(ident)
|
||||
|
||||
if meta.etag != status.etag: # etag changed
|
||||
old_hash = status.hash
|
||||
if old_hash is None or meta.hash != old_hash:
|
||||
# item actually changed
|
||||
return True
|
||||
else:
|
||||
# only etag changed
|
||||
return False
|
||||
return (
|
||||
new_meta.etag != old_meta.etag and # etag changed
|
||||
# item actually changed
|
||||
(old_meta.hash is None or new_meta.hash != old_meta.hash)
|
||||
)
|
||||
|
||||
def set_item_cache(self, ident, item):
|
||||
assert self.status.get_new(ident).hash == item.hash
|
||||
actual_hash = self.status.get_new(ident).hash
|
||||
assert actual_hash == item.hash
|
||||
self._item_cache[ident] = item
|
||||
|
||||
def get_item_cache(self, ident):
|
||||
|
|
@ -370,31 +603,35 @@ def sync(storage_a, storage_b, status, conflict_resolution=None,
|
|||
status_nonempty = bool(status)
|
||||
status = _Status(status)
|
||||
|
||||
a_info = _StorageInfo(storage_a, _SubStatus(status, 'a'))
|
||||
b_info = _StorageInfo(storage_b, _SubStatus(status, 'b'))
|
||||
with status.transaction():
|
||||
a_info = _StorageInfo(storage_a, _SubStatus(status, 'a'))
|
||||
b_info = _StorageInfo(storage_b, _SubStatus(status, 'b'))
|
||||
|
||||
a_nonempty = a_info.prepare_new_status()
|
||||
b_nonempty = b_info.prepare_new_status()
|
||||
a_nonempty = a_info.prepare_new_status()
|
||||
b_nonempty = b_info.prepare_new_status()
|
||||
|
||||
if status_nonempty and not force_delete:
|
||||
if a_nonempty and not b_nonempty:
|
||||
raise StorageEmpty(empty_storage=storage_b)
|
||||
elif not a_nonempty and b_nonempty:
|
||||
raise StorageEmpty(empty_storage=storage_a)
|
||||
if status_nonempty and not force_delete:
|
||||
if a_nonempty and not b_nonempty:
|
||||
raise StorageEmpty(empty_storage=storage_b)
|
||||
elif not a_nonempty and b_nonempty:
|
||||
raise StorageEmpty(empty_storage=storage_a)
|
||||
|
||||
actions = list(_get_actions(a_info, b_info))
|
||||
actions = list(_get_actions(a_info, b_info))
|
||||
|
||||
with storage_a.at_once(), storage_b.at_once():
|
||||
for action in actions:
|
||||
try:
|
||||
action.run(a_info, b_info, conflict_resolution, partial_sync)
|
||||
except Exception as e:
|
||||
if error_callback:
|
||||
error_callback(e)
|
||||
else:
|
||||
raise
|
||||
|
||||
status.new_to_old_status()
|
||||
with storage_a.at_once(), storage_b.at_once():
|
||||
for action in actions:
|
||||
try:
|
||||
action.run(
|
||||
a_info,
|
||||
b_info,
|
||||
conflict_resolution,
|
||||
partial_sync
|
||||
)
|
||||
except Exception as e:
|
||||
if error_callback:
|
||||
error_callback(e)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
class Action:
|
||||
|
|
@ -440,6 +677,7 @@ class Upload(Action):
|
|||
sync_logger.info(u'Copying (uploading) item {} to {}'
|
||||
.format(self.ident, self.dest.storage))
|
||||
href, etag = self.dest.storage.upload(self.item)
|
||||
assert href is not None
|
||||
|
||||
self.dest.status.insert_ident(self.ident, _ItemMetadata(
|
||||
href=href,
|
||||
|
|
@ -518,15 +756,8 @@ class ResolveConflict(Action):
|
|||
def _get_actions(a_info, b_info):
|
||||
for ident in uniq(itertools.chain(a_info.status.parent.iter_new(),
|
||||
a_info.status.parent.iter_old())):
|
||||
try:
|
||||
a = a_info.status.get_new(ident)
|
||||
except KeyError:
|
||||
a = None
|
||||
|
||||
try:
|
||||
b = b_info.status.get_new(ident)
|
||||
except KeyError:
|
||||
b = None
|
||||
a = a_info.status.get_new(ident)
|
||||
b = b_info.status.get_new(ident)
|
||||
|
||||
if a and b:
|
||||
a_changed = a_info.is_changed(ident)
|
||||
|
|
|
|||
Loading…
Reference in a new issue