Merge pull request #227 from untitaker/metadata

Metadata
This commit is contained in:
Markus Unterwaditzer 2015-07-07 16:36:07 +02:00
commit c84cfc0a29
16 changed files with 389 additions and 22 deletions

View file

@ -1,5 +1,8 @@
# An example configuration for vdirsyncer. # An example configuration for vdirsyncer.
#
# Move it to ~/.vdirsyncer/config or ~/.config/vdirsyncer/config and edit it.
# Run `vdirsyncer --help` for CLI usage.
#
# Optional parameters are commented out. # Optional parameters are commented out.
# This file doesn't document all available parameters, see # This file doesn't document all available parameters, see
# http://vdirsyncer.readthedocs.org/ for the rest of them. # http://vdirsyncer.readthedocs.org/ for the rest of them.
@ -27,6 +30,9 @@ b = bob_contacts_remote
collections = ["from b"] collections = ["from b"]
# Synchronize the "display name" property into a local file (~/.contacts/displayname).
metadata = ["displayname"]
# To resolve a conflict the following values are possible: # To resolve a conflict the following values are possible:
# `null` - abort when collisions occur (default) # `null` - abort when collisions occur (default)
# `"a wins"` - assume a's items to be more up-to-date # `"a wins"` - assume a's items to be more up-to-date
@ -54,6 +60,9 @@ a = bob_calendar_local
b = bob_calendar_remote b = bob_calendar_remote
collections = ["private", "work"] collections = ["private", "work"]
# Calendars also have a color property
metadata = ["displayname", "color"]
[storage bob_calendar_local] [storage bob_calendar_local]
type = filesystem type = filesystem
path = ~/.calendars/ path = ~/.calendars/

View file

@ -62,10 +62,11 @@ Pair Section
- ``a`` and ``b`` reference the storages to sync by their names. - ``a`` and ``b`` reference the storages to sync by their names.
- ``collections``: Optional, a list of collections to synchronize. If this - ``collections``: Optional, a list of collections to synchronize when
parameter is omitted, it is assumed the storages are already directly ``vdirsyncer sync`` is executed. If this parameter is omitted, it is assumed
pointing to one collection each. Specifying a collection multiple times won't the storages are already directly pointing to one collection each. Specifying
make vdirsyncer sync that collection more than once. a collection multiple times won't make vdirsyncer sync that collection more
than once.
Furthermore, there are the special values ``"from a"`` and ``"from b"``, Furthermore, there are the special values ``"from a"`` and ``"from b"``,
which tell vdirsyncer to try autodiscovery on a specific storage. which tell vdirsyncer to try autodiscovery on a specific storage.
@ -88,6 +89,14 @@ Pair Section
Vdirsyncer will not attempt to merge the two items. Vdirsyncer will not attempt to merge the two items.
- ``null``, the default, where an error is shown and no changes are done. - ``null``, the default, where an error is shown and no changes are done.
- ``metadata``: Metadata keys that should be synchronized when ``vdirsyncer
metasync`` is executed. Example::
metadata = ["color", "displayname"]
This synchronizes the ``color`` and the ``displayname`` properties. The
``conflict_resolution`` parameter applies here as well.
.. _storage_config: .. _storage_config:
Storage Section Storage Section

View file

@ -43,10 +43,28 @@ the client, which are free to choose a different scheme for filenames instead.
.. _CardDAV: http://tools.ietf.org/html/rfc6352 .. _CardDAV: http://tools.ietf.org/html/rfc6352
.. _CalDAV: http://tools.ietf.org/search/rfc4791 .. _CalDAV: http://tools.ietf.org/search/rfc4791
Metadata
========
Any of the below metadata files may be absent. None of the files listed below
have any file extensions.
- A file called ``color`` inside the vdir indicates the vdir's color, a
property that is only relevant in UI design.
Its content is an ASCII-encoded hex-RGB value of the form ``#RRGGBB``. For
example, a file content of ``#FF0000`` indicates that the vdir has a red
(user-visible) color. No short forms or informal values such as ``red`` (as
known from CSS, for example) are allowed. The prefixing ``#`` must be
present.
- A file called ``displayname`` contains a UTF-8 encoded label that may be used
to represent the vdir in UIs.
Writing to vdirs Writing to vdirs
================ ================
Creating and modifying items *should* happen atomically_. Creating and modifying items or metadata files *should* happen atomically_.
Writing to a temporary file on the same physical device, and then moving it to Writing to a temporary file on the same physical device, and then moving it to
the appropriate location is usually a very effective solution. For this the appropriate location is usually a very effective solution. For this

View file

@ -21,6 +21,7 @@ def format_item(item_template, uid=None):
class StorageTests(object): class StorageTests(object):
storage_class = None storage_class = None
supports_collections = True supports_collections = True
supports_metadata = True
@pytest.fixture(params=['VEVENT', 'VTODO', 'VCARD']) @pytest.fixture(params=['VEVENT', 'VTODO', 'VCARD'])
def item_type(self, request): def item_type(self, request):
@ -55,6 +56,11 @@ class StorageTests(object):
if not self.supports_collections: if not self.supports_collections:
pytest.skip('This storage does not support collections.') pytest.skip('This storage does not support collections.')
@pytest.fixture
def requires_metadata(self):
if not self.supports_metadata:
pytest.skip('This storage does not support metadata.')
def test_generic(self, s, get_item): def test_generic(self, s, get_item):
items = [get_item() for i in range(1, 10)] items = [get_item() for i in range(1, 10)]
hrefs = [] hrefs = []
@ -227,3 +233,15 @@ class StorageTests(object):
s = self.storage_class(**get_storage_args(collection=collname)) s = self.storage_class(**get_storage_args(collection=collname))
href, etag = s.upload(get_item()) href, etag = s.upload(get_item())
s.get(href) s.get(href)
def test_metadata(self, requires_metadata, s):
try:
s.set_meta('color', None)
assert s.get_meta('color') is None
s.set_meta('color', u'#ff0000')
assert s.get_meta('color') == u'#ff0000'
except exceptions.UnsupportedMetadataError:
pass
s.set_meta('displayname', u'hello world')
assert s.get_meta('displayname') == u'hello world'

View file

@ -43,6 +43,7 @@ class CombinedStorage(Storage):
class TestHttpStorage(StorageTests): class TestHttpStorage(StorageTests):
storage_class = CombinedStorage storage_class = CombinedStorage
supports_collections = False supports_collections = False
supports_metadata = False
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_tmpdir(self, tmpdir, monkeypatch): def setup_tmpdir(self, tmpdir, monkeypatch):

View file

@ -11,6 +11,7 @@ class TestSingleFileStorage(StorageTests):
storage_class = SingleFileStorage storage_class = SingleFileStorage
supports_collections = False supports_collections = False
supports_metadata = False
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup(self, tmpdir): def setup(self, tmpdir):

77
tests/test_metasync.py Normal file
View file

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
import pytest
import vdirsyncer.exceptions as exceptions
from vdirsyncer.storage.base import Item
from vdirsyncer.storage.memory import MemoryStorage
from vdirsyncer.metasync import metasync, MetaSyncConflict
from . import assert_item_equals, blow_up, normalize_item
def test_irrelevant_status():
a = MemoryStorage()
b = MemoryStorage()
status = {'foo': 'bar'}
metasync(a, b, status, keys=())
assert not status
def test_basic():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == 'bar'
a.set_meta('foo', 'baz')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == 'baz'
b.set_meta('foo', None)
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') is b.get_meta('foo') is None
def test_conflict():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'baz')
with pytest.raises(MetaSyncConflict):
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == 'bar'
assert b.get_meta('foo') == 'baz'
assert not status
def test_conflict_same_content():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'bar')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == status['foo'] == 'bar'
@pytest.mark.parametrize('wins', 'ab')
def test_conflict_x_wins(wins):
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'baz')
metasync(a, b, status, keys=['foo'],
conflict_resolution='a wins' if wins == 'a' else 'b wins')
assert a.get_meta('foo') == b.get_meta('foo') == status['foo'] == (
'bar' if wins == 'a' else 'baz'
)

View file

@ -209,9 +209,6 @@ def test_get_class_init_args_on_storage():
assert not required assert not required
@pytest.mark.skipif((not utils.compat.PY2
or platform.python_implementation() == 'PyPy'),
reason='https://github.com/shazow/urllib3/issues/529')
def test_request_ssl(httpsserver): def test_request_ssl(httpsserver):
sha1 = '94:FD:7A:CB:50:75:A4:69:82:0A:F8:23:DF:07:FC:69:3E:CD:90:CA' sha1 = '94:FD:7A:CB:50:75:A4:69:82:0A:F8:23:DF:07:FC:69:3E:CD:90:CA'
md5 = '19:90:F7:23:94:F2:EF:AB:2B:64:2D:57:3D:25:95:2D' md5 = '19:90:F7:23:94:F2:EF:AB:2B:64:2D:57:3D:25:95:2D'

View file

@ -105,14 +105,19 @@ def sync(pairs, force_delete, max_workers):
Synchronize the given pairs. If no arguments are given, all will be Synchronize the given pairs. If no arguments are given, all will be
synchronized. synchronized.
This command will not synchronize metadata, use `vdirsyncer metasync` for
that.
Examples:
`vdirsyncer sync` will sync everything configured. `vdirsyncer sync` will sync everything configured.
`vdirsyncer sync bob frank` will sync the pairs "bob" and "frank". `vdirsyncer sync bob frank` will sync the pairs "bob" and "frank".
`vdirsyncer sync bob/first_collection` will sync "first_collection" from `vdirsyncer sync bob/first_collection` will sync "first_collection"
the pair "bob". from the pair "bob".
''' '''
from .tasks import sync_pair from .tasks import prepare_pair, sync_collection
from .utils import parse_pairs_args, WorkerQueue from .utils import parse_pairs_args, WorkerQueue
general, all_pairs, all_storages = ctx.obj['config'] general, all_pairs, all_storages = ctx.obj['config']
@ -120,11 +125,39 @@ def sync(pairs, force_delete, max_workers):
for pair_name, collections in parse_pairs_args(pairs, all_pairs): for pair_name, collections in parse_pairs_args(pairs, all_pairs):
wq.spawn_worker() wq.spawn_worker()
wq.put(functools.partial(sync_pair, pair_name=pair_name, wq.put(functools.partial(prepare_pair, pair_name=pair_name,
collections_to_sync=collections, collections=collections,
general=general, all_pairs=all_pairs, general=general, all_pairs=all_pairs,
all_storages=all_storages, all_storages=all_storages,
force_delete=force_delete)) force_delete=force_delete,
callback=sync_collection))
wq.join()
@app.command()
@click.argument('pairs', nargs=-1)
@max_workers_option
@catch_errors
def metasync(pairs, max_workers):
'''
Synchronize metadata of the given pairs.
See the `sync` command regarding the PAIRS argument.
'''
from .tasks import prepare_pair, metasync_collection
from .utils import parse_pairs_args, WorkerQueue
general, all_pairs, all_storages = ctx.obj['config']
wq = WorkerQueue(max_workers)
for pair_name, collections in parse_pairs_args(pairs, all_pairs):
wq.spawn_worker()
wq.put(functools.partial(prepare_pair, pair_name=pair_name,
collections=collections,
general=general, all_pairs=all_pairs,
all_storages=all_storages,
callback=metasync_collection))
wq.join() wq.join()

View file

@ -10,8 +10,8 @@ from .utils import CliError, JobFailed, cli_logger, collections_for_pair, \
from ..sync import sync from ..sync import sync
def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs, def prepare_pair(wq, pair_name, collections, general, all_pairs, all_storages,
all_storages, force_delete): callback, **kwargs):
a_name, b_name, pair_options = all_pairs[pair_name] a_name, b_name, pair_options = all_pairs[pair_name]
try: try:
@ -28,7 +28,7 @@ def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs,
# spawn one worker less because we can reuse the current one # spawn one worker less because we can reuse the current one
new_workers = -1 new_workers = -1
for collection in (collections_to_sync or all_collections): for collection in (collections or all_collections):
try: try:
config_a, config_b = all_collections[collection] config_a, config_b = all_collections[collection]
except KeyError: except KeyError:
@ -37,9 +37,9 @@ def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs,
pair_name, collection, list(all_collections))) pair_name, collection, list(all_collections)))
new_workers += 1 new_workers += 1
wq.put(functools.partial( wq.put(functools.partial(
sync_collection, pair_name=pair_name, collection=collection, callback, pair_name=pair_name, collection=collection,
config_a=config_a, config_b=config_b, pair_options=pair_options, config_a=config_a, config_b=config_b, pair_options=pair_options,
general=general, force_delete=force_delete general=general, **kwargs
)) ))
for i in range(new_workers): for i in range(new_workers):
@ -107,3 +107,30 @@ def repair_collection(general, all_pairs, all_storages, collection):
cli_logger.info('Repairing {}/{}'.format(storage_name, collection)) cli_logger.info('Repairing {}/{}'.format(storage_name, collection))
cli_logger.warning('Make sure no other program is talking to the server.') cli_logger.warning('Make sure no other program is talking to the server.')
repair_storage(storage) repair_storage(storage)
def metasync_collection(wq, pair_name, collection, config_a, config_b,
pair_options, general):
from ..metasync import metasync
status_name = get_status_name(pair_name, collection)
try:
cli_logger.info('Metasyncing {}'.format(status_name))
status = load_status(general['status_path'], pair_name,
collection, data_type='metadata') or {}
a = storage_instance_from_config(config_a)
b = storage_instance_from_config(config_b)
metasync(
a, b, status,
conflict_resolution=pair_options.get('conflict_resolution', None),
keys=pair_options.get('metadata', None) or ()
)
except:
handle_cli_error(status_name)
raise JobFailed()
save_status(general['status_path'], pair_name, collection,
data_type='metadata', data=status)

View file

@ -56,3 +56,7 @@ class ReadOnlyError(Error):
class InvalidResponse(Error, ValueError): class InvalidResponse(Error, ValueError):
'''The backend returned an invalid result.''' '''The backend returned an invalid result.'''
class UnsupportedMetadataError(Error, NotImplementedError):
'''The storage doesn't support this type of metadata.'''

54
vdirsyncer/metasync.py Normal file
View file

@ -0,0 +1,54 @@
from . import exceptions, log
logger = log.get(__name__)
class MetaSyncError(exceptions.Error):
pass
class MetaSyncConflict(MetaSyncError):
key = None
def metasync(storage_a, storage_b, status, keys, conflict_resolution=None):
def _a_to_b():
logger.info(u'Copying {} to {}'.format(key, storage_b))
storage_b.set_meta(key, a)
status[key] = a
def _b_to_a():
logger.info(u'Copying {} to {}'.format(key, storage_a))
storage_a.set_meta(key, b)
status[key] = b
def _resolve_conflict():
if a == b:
status[key] = a
elif conflict_resolution is None:
raise MetaSyncConflict(key=key)
elif conflict_resolution == 'a wins':
_a_to_b()
elif conflict_resolution == 'b wins':
_b_to_a()
for key in keys:
a = storage_a.get_meta(key)
b = storage_b.get_meta(key)
s = status.get(key)
logger.debug(u'Key: {}'.format(key))
logger.debug(u'A: {}'.format(a))
logger.debug(u'B: {}'.format(b))
logger.debug(u'S: {}'.format(s))
if a != s and b != s:
_resolve_conflict()
elif a != s and b == s:
_a_to_b()
elif a == s and b != s:
_b_to_a()
else:
assert a == b
for key in set(status) - set(keys):
del status[key]

View file

@ -208,3 +208,25 @@ class Storage(with_metaclass(StorageMeta)):
when. when.
''' '''
yield yield
def get_meta(self, key):
'''Get metadata value for collection/storage.
See the vdir specification for the keys that *have* to be accepted.
:param key: The metadata key.
:type key: unicode
'''
raise NotImplementedError('This storage does not support metadata.')
def set_meta(self, key, value):
'''Get metadata value for collection/storage.
:param key: The metadata key.
:type key: unicode
:param value: The value.
:type value: unicode
'''
raise NotImplementedError('This storage does not support metadata.')

View file

@ -336,6 +336,10 @@ class DavStorage(Storage):
_session = None _session = None
_repr_attributes = ('username', 'url') _repr_attributes = ('username', 'url')
_property_table = {
'displayname': ('displayname', 'DAV:'),
}
def __init__(self, url, username='', password='', verify=True, auth=None, def __init__(self, url, username='', password='', verify=True, auth=None,
useragent=USERAGENT, unsafe_href_chars='@', useragent=USERAGENT, unsafe_href_chars='@',
verify_fingerprint=None, auth_cert=None, **kwargs): verify_fingerprint=None, auth_cert=None, **kwargs):
@ -546,6 +550,69 @@ class DavStorage(Storage):
for href, etag, prop in rv: for href, etag, prop in rv:
yield utils.compat.urlunquote(href), etag yield utils.compat.urlunquote(href), etag
def get_meta(self, key):
try:
tagname, namespace = self._property_table[key]
except KeyError:
raise exceptions.UnsupportedMetadataError()
lxml_selector = '{%s}%s' % (namespace, tagname)
data = '''<?xml version="1.0" encoding="utf-8" ?>
<D:propfind xmlns:D="DAV:">
<D:prop>
{}
</D:prop>
</D:propfind>
'''.format(
to_native(etree.tostring(etree.Element(lxml_selector)))
)
headers = self.session.get_default_headers()
headers['Depth'] = 0
response = self.session.request(
'PROPFIND', '',
data=data, headers=headers
)
root = _parse_xml(response.content)
for prop in root.findall('.//' + lxml_selector):
text = getattr(prop, 'text', None)
if text:
return text
def set_meta(self, key, value):
try:
tagname, namespace = self._property_table[key]
except KeyError:
raise exceptions.UnsupportedMetadataError()
lxml_selector = '{%s}%s' % (namespace, tagname)
element = etree.Element(lxml_selector)
element.text = value
data = '''<?xml version="1.0" encoding="utf-8" ?>
<D:propertyupdate xmlns:D="DAV:">
<D:set>
<D:prop>
{}
</D:prop>
</D:set>
</D:propertyupdate>
'''.format(to_native(etree.tostring(element)))
self.session.request(
'PROPPATCH', '',
data=data, headers=self.session.get_default_headers()
)
# XXX: Response content is currently ignored. Though exceptions are
# raised for HTTP errors, a multistatus with errorcodes inside is not
# parsed yet. Not sure how common those are, or how they look like. It
# might be easier (and safer in case of a stupid server) to just issue
# a PROPFIND to see if the value got actually set.
class CaldavStorage(DavStorage): class CaldavStorage(DavStorage):
@ -598,6 +665,11 @@ class CaldavStorage(DavStorage):
get_multi_data_query = '{urn:ietf:params:xml:ns:caldav}calendar-data' get_multi_data_query = '{urn:ietf:params:xml:ns:caldav}calendar-data'
_property_table = dict(DavStorage._property_table)
_property_table.update({
'color': ('calendar-color', 'http://apple.com/ns/ical/'),
})
def __init__(self, start_date=None, end_date=None, def __init__(self, start_date=None, end_date=None,
item_types=(), **kwargs): item_types=(), **kwargs):
super(CaldavStorage, self).__init__(**kwargs) super(CaldavStorage, self).__init__(**kwargs)

View file

@ -180,3 +180,21 @@ class FilesystemStorage(Storage):
subprocess.call([self.post_hook, fpath]) subprocess.call([self.post_hook, fpath])
except OSError as e: except OSError as e:
logger.warning('Error executing external hook: {}'.format(str(e))) logger.warning('Error executing external hook: {}'.format(str(e)))
def get_meta(self, key):
fpath = os.path.join(self.path, key)
try:
with open(fpath, 'rb') as f:
return f.read().decode(self.encoding) or None
except IOError as e:
if e.errno == errno.ENOENT:
return None
else:
raise
def set_meta(self, key, value):
value = value or u''
assert isinstance(value, text_type)
fpath = os.path.join(self.path, key)
with atomic_write(fpath, mode='wb', overwrite=True) as f:
f.write(value.encode(self.encoding))

View file

@ -21,6 +21,7 @@ class MemoryStorage(Storage):
raise exceptions.UserError('MemoryStorage does not support ' raise exceptions.UserError('MemoryStorage does not support '
'collections.') 'collections.')
self.items = {} # href => (etag, item) self.items = {} # href => (etag, item)
self.metadata = {}
self.fileext = fileext self.fileext = fileext
super(MemoryStorage, self).__init__(**kwargs) super(MemoryStorage, self).__init__(**kwargs)
@ -63,3 +64,9 @@ class MemoryStorage(Storage):
if etag != self.items[href][0]: if etag != self.items[href][0]:
raise exceptions.WrongEtagError(etag) raise exceptions.WrongEtagError(etag)
del self.items[href] del self.items[href]
def get_meta(self, key):
return self.metadata.get(key)
def set_meta(self, key, value):
self.metadata[key] = value