Merge pull request #227 from untitaker/metadata

Metadata
This commit is contained in:
Markus Unterwaditzer 2015-07-07 16:36:07 +02:00
commit c84cfc0a29
16 changed files with 389 additions and 22 deletions

View file

@ -1,5 +1,8 @@
# An example configuration for vdirsyncer.
#
# Move it to ~/.vdirsyncer/config or ~/.config/vdirsyncer/config and edit it.
# Run `vdirsyncer --help` for CLI usage.
#
# Optional parameters are commented out.
# This file doesn't document all available parameters, see
# http://vdirsyncer.readthedocs.org/ for the rest of them.
@ -27,6 +30,9 @@ b = bob_contacts_remote
collections = ["from b"]
# Synchronize the "display name" property into a local file (~/.contacts/displayname).
metadata = ["displayname"]
# To resolve a conflict the following values are possible:
# `null` - abort when collisions occur (default)
# `"a wins"` - assume a's items to be more up-to-date
@ -54,6 +60,9 @@ a = bob_calendar_local
b = bob_calendar_remote
collections = ["private", "work"]
# Calendars also have a color property
metadata = ["displayname", "color"]
[storage bob_calendar_local]
type = filesystem
path = ~/.calendars/

View file

@ -62,10 +62,11 @@ Pair Section
- ``a`` and ``b`` reference the storages to sync by their names.
- ``collections``: Optional, a list of collections to synchronize. If this
parameter is omitted, it is assumed the storages are already directly
pointing to one collection each. Specifying a collection multiple times won't
make vdirsyncer sync that collection more than once.
- ``collections``: Optional, a list of collections to synchronize when
``vdirsyncer sync`` is executed. If this parameter is omitted, it is assumed
the storages are already directly pointing to one collection each. Specifying
a collection multiple times won't make vdirsyncer sync that collection more
than once.
Furthermore, there are the special values ``"from a"`` and ``"from b"``,
which tell vdirsyncer to try autodiscovery on a specific storage.
@ -88,6 +89,14 @@ Pair Section
Vdirsyncer will not attempt to merge the two items.
- ``null``, the default, where an error is shown and no changes are done.
- ``metadata``: Metadata keys that should be synchronized when ``vdirsyncer
metasync`` is executed. Example::
metadata = ["color", "displayname"]
This synchronizes the ``color`` and the ``displayname`` properties. The
``conflict_resolution`` parameter applies here as well.
.. _storage_config:
Storage Section

View file

@ -43,10 +43,28 @@ the client, which are free to choose a different scheme for filenames instead.
.. _CardDAV: http://tools.ietf.org/html/rfc6352
.. _CalDAV: http://tools.ietf.org/search/rfc4791
Metadata
========
Any of the below metadata files may be absent. None of the files listed below
have any file extensions.
- A file called ``color`` inside the vdir indicates the vdir's color, a
property that is only relevant in UI design.
Its content is an ASCII-encoded hex-RGB value of the form ``#RRGGBB``. For
example, a file content of ``#FF0000`` indicates that the vdir has a red
(user-visible) color. No short forms or informal values such as ``red`` (as
known from CSS, for example) are allowed. The prefixing ``#`` must be
present.
- A file called ``displayname`` contains a UTF-8 encoded label that may be used
to represent the vdir in UIs.
Writing to vdirs
================
Creating and modifying items *should* happen atomically_.
Creating and modifying items or metadata files *should* happen atomically_.
Writing to a temporary file on the same physical device, and then moving it to
the appropriate location is usually a very effective solution. For this

View file

@ -21,6 +21,7 @@ def format_item(item_template, uid=None):
class StorageTests(object):
storage_class = None
supports_collections = True
supports_metadata = True
@pytest.fixture(params=['VEVENT', 'VTODO', 'VCARD'])
def item_type(self, request):
@ -55,6 +56,11 @@ class StorageTests(object):
if not self.supports_collections:
pytest.skip('This storage does not support collections.')
@pytest.fixture
def requires_metadata(self):
if not self.supports_metadata:
pytest.skip('This storage does not support metadata.')
def test_generic(self, s, get_item):
items = [get_item() for i in range(1, 10)]
hrefs = []
@ -227,3 +233,15 @@ class StorageTests(object):
s = self.storage_class(**get_storage_args(collection=collname))
href, etag = s.upload(get_item())
s.get(href)
def test_metadata(self, requires_metadata, s):
try:
s.set_meta('color', None)
assert s.get_meta('color') is None
s.set_meta('color', u'#ff0000')
assert s.get_meta('color') == u'#ff0000'
except exceptions.UnsupportedMetadataError:
pass
s.set_meta('displayname', u'hello world')
assert s.get_meta('displayname') == u'hello world'

View file

@ -43,6 +43,7 @@ class CombinedStorage(Storage):
class TestHttpStorage(StorageTests):
storage_class = CombinedStorage
supports_collections = False
supports_metadata = False
@pytest.fixture(autouse=True)
def setup_tmpdir(self, tmpdir, monkeypatch):

View file

@ -11,6 +11,7 @@ class TestSingleFileStorage(StorageTests):
storage_class = SingleFileStorage
supports_collections = False
supports_metadata = False
@pytest.fixture(autouse=True)
def setup(self, tmpdir):

77
tests/test_metasync.py Normal file
View file

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
import pytest
import vdirsyncer.exceptions as exceptions
from vdirsyncer.storage.base import Item
from vdirsyncer.storage.memory import MemoryStorage
from vdirsyncer.metasync import metasync, MetaSyncConflict
from . import assert_item_equals, blow_up, normalize_item
def test_irrelevant_status():
a = MemoryStorage()
b = MemoryStorage()
status = {'foo': 'bar'}
metasync(a, b, status, keys=())
assert not status
def test_basic():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == 'bar'
a.set_meta('foo', 'baz')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == 'baz'
b.set_meta('foo', None)
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') is b.get_meta('foo') is None
def test_conflict():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'baz')
with pytest.raises(MetaSyncConflict):
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == 'bar'
assert b.get_meta('foo') == 'baz'
assert not status
def test_conflict_same_content():
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'bar')
metasync(a, b, status, keys=['foo'])
assert a.get_meta('foo') == b.get_meta('foo') == status['foo'] == 'bar'
@pytest.mark.parametrize('wins', 'ab')
def test_conflict_x_wins(wins):
a = MemoryStorage()
b = MemoryStorage()
status = {}
a.set_meta('foo', 'bar')
b.set_meta('foo', 'baz')
metasync(a, b, status, keys=['foo'],
conflict_resolution='a wins' if wins == 'a' else 'b wins')
assert a.get_meta('foo') == b.get_meta('foo') == status['foo'] == (
'bar' if wins == 'a' else 'baz'
)

View file

@ -209,9 +209,6 @@ def test_get_class_init_args_on_storage():
assert not required
@pytest.mark.skipif((not utils.compat.PY2
or platform.python_implementation() == 'PyPy'),
reason='https://github.com/shazow/urllib3/issues/529')
def test_request_ssl(httpsserver):
sha1 = '94:FD:7A:CB:50:75:A4:69:82:0A:F8:23:DF:07:FC:69:3E:CD:90:CA'
md5 = '19:90:F7:23:94:F2:EF:AB:2B:64:2D:57:3D:25:95:2D'

View file

@ -105,14 +105,19 @@ def sync(pairs, force_delete, max_workers):
Synchronize the given pairs. If no arguments are given, all will be
synchronized.
`vdirsyncer sync` will sync everything configured.
This command will not synchronize metadata, use `vdirsyncer metasync` for
that.
`vdirsyncer sync bob frank` will sync the pairs "bob" and "frank".
Examples:
`vdirsyncer sync bob/first_collection` will sync "first_collection" from
the pair "bob".
`vdirsyncer sync` will sync everything configured.
`vdirsyncer sync bob frank` will sync the pairs "bob" and "frank".
`vdirsyncer sync bob/first_collection` will sync "first_collection"
from the pair "bob".
'''
from .tasks import sync_pair
from .tasks import prepare_pair, sync_collection
from .utils import parse_pairs_args, WorkerQueue
general, all_pairs, all_storages = ctx.obj['config']
@ -120,11 +125,39 @@ def sync(pairs, force_delete, max_workers):
for pair_name, collections in parse_pairs_args(pairs, all_pairs):
wq.spawn_worker()
wq.put(functools.partial(sync_pair, pair_name=pair_name,
collections_to_sync=collections,
wq.put(functools.partial(prepare_pair, pair_name=pair_name,
collections=collections,
general=general, all_pairs=all_pairs,
all_storages=all_storages,
force_delete=force_delete))
force_delete=force_delete,
callback=sync_collection))
wq.join()
@app.command()
@click.argument('pairs', nargs=-1)
@max_workers_option
@catch_errors
def metasync(pairs, max_workers):
'''
Synchronize metadata of the given pairs.
See the `sync` command regarding the PAIRS argument.
'''
from .tasks import prepare_pair, metasync_collection
from .utils import parse_pairs_args, WorkerQueue
general, all_pairs, all_storages = ctx.obj['config']
wq = WorkerQueue(max_workers)
for pair_name, collections in parse_pairs_args(pairs, all_pairs):
wq.spawn_worker()
wq.put(functools.partial(prepare_pair, pair_name=pair_name,
collections=collections,
general=general, all_pairs=all_pairs,
all_storages=all_storages,
callback=metasync_collection))
wq.join()

View file

@ -10,8 +10,8 @@ from .utils import CliError, JobFailed, cli_logger, collections_for_pair, \
from ..sync import sync
def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs,
all_storages, force_delete):
def prepare_pair(wq, pair_name, collections, general, all_pairs, all_storages,
callback, **kwargs):
a_name, b_name, pair_options = all_pairs[pair_name]
try:
@ -28,7 +28,7 @@ def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs,
# spawn one worker less because we can reuse the current one
new_workers = -1
for collection in (collections_to_sync or all_collections):
for collection in (collections or all_collections):
try:
config_a, config_b = all_collections[collection]
except KeyError:
@ -37,9 +37,9 @@ def sync_pair(wq, pair_name, collections_to_sync, general, all_pairs,
pair_name, collection, list(all_collections)))
new_workers += 1
wq.put(functools.partial(
sync_collection, pair_name=pair_name, collection=collection,
callback, pair_name=pair_name, collection=collection,
config_a=config_a, config_b=config_b, pair_options=pair_options,
general=general, force_delete=force_delete
general=general, **kwargs
))
for i in range(new_workers):
@ -107,3 +107,30 @@ def repair_collection(general, all_pairs, all_storages, collection):
cli_logger.info('Repairing {}/{}'.format(storage_name, collection))
cli_logger.warning('Make sure no other program is talking to the server.')
repair_storage(storage)
def metasync_collection(wq, pair_name, collection, config_a, config_b,
pair_options, general):
from ..metasync import metasync
status_name = get_status_name(pair_name, collection)
try:
cli_logger.info('Metasyncing {}'.format(status_name))
status = load_status(general['status_path'], pair_name,
collection, data_type='metadata') or {}
a = storage_instance_from_config(config_a)
b = storage_instance_from_config(config_b)
metasync(
a, b, status,
conflict_resolution=pair_options.get('conflict_resolution', None),
keys=pair_options.get('metadata', None) or ()
)
except:
handle_cli_error(status_name)
raise JobFailed()
save_status(general['status_path'], pair_name, collection,
data_type='metadata', data=status)

View file

@ -56,3 +56,7 @@ class ReadOnlyError(Error):
class InvalidResponse(Error, ValueError):
'''The backend returned an invalid result.'''
class UnsupportedMetadataError(Error, NotImplementedError):
'''The storage doesn't support this type of metadata.'''

54
vdirsyncer/metasync.py Normal file
View file

@ -0,0 +1,54 @@
from . import exceptions, log
logger = log.get(__name__)
class MetaSyncError(exceptions.Error):
pass
class MetaSyncConflict(MetaSyncError):
key = None
def metasync(storage_a, storage_b, status, keys, conflict_resolution=None):
def _a_to_b():
logger.info(u'Copying {} to {}'.format(key, storage_b))
storage_b.set_meta(key, a)
status[key] = a
def _b_to_a():
logger.info(u'Copying {} to {}'.format(key, storage_a))
storage_a.set_meta(key, b)
status[key] = b
def _resolve_conflict():
if a == b:
status[key] = a
elif conflict_resolution is None:
raise MetaSyncConflict(key=key)
elif conflict_resolution == 'a wins':
_a_to_b()
elif conflict_resolution == 'b wins':
_b_to_a()
for key in keys:
a = storage_a.get_meta(key)
b = storage_b.get_meta(key)
s = status.get(key)
logger.debug(u'Key: {}'.format(key))
logger.debug(u'A: {}'.format(a))
logger.debug(u'B: {}'.format(b))
logger.debug(u'S: {}'.format(s))
if a != s and b != s:
_resolve_conflict()
elif a != s and b == s:
_a_to_b()
elif a == s and b != s:
_b_to_a()
else:
assert a == b
for key in set(status) - set(keys):
del status[key]

View file

@ -208,3 +208,25 @@ class Storage(with_metaclass(StorageMeta)):
when.
'''
yield
def get_meta(self, key):
'''Get metadata value for collection/storage.
See the vdir specification for the keys that *have* to be accepted.
:param key: The metadata key.
:type key: unicode
'''
raise NotImplementedError('This storage does not support metadata.')
def set_meta(self, key, value):
'''Get metadata value for collection/storage.
:param key: The metadata key.
:type key: unicode
:param value: The value.
:type value: unicode
'''
raise NotImplementedError('This storage does not support metadata.')

View file

@ -336,6 +336,10 @@ class DavStorage(Storage):
_session = None
_repr_attributes = ('username', 'url')
_property_table = {
'displayname': ('displayname', 'DAV:'),
}
def __init__(self, url, username='', password='', verify=True, auth=None,
useragent=USERAGENT, unsafe_href_chars='@',
verify_fingerprint=None, auth_cert=None, **kwargs):
@ -546,6 +550,69 @@ class DavStorage(Storage):
for href, etag, prop in rv:
yield utils.compat.urlunquote(href), etag
def get_meta(self, key):
try:
tagname, namespace = self._property_table[key]
except KeyError:
raise exceptions.UnsupportedMetadataError()
lxml_selector = '{%s}%s' % (namespace, tagname)
data = '''<?xml version="1.0" encoding="utf-8" ?>
<D:propfind xmlns:D="DAV:">
<D:prop>
{}
</D:prop>
</D:propfind>
'''.format(
to_native(etree.tostring(etree.Element(lxml_selector)))
)
headers = self.session.get_default_headers()
headers['Depth'] = 0
response = self.session.request(
'PROPFIND', '',
data=data, headers=headers
)
root = _parse_xml(response.content)
for prop in root.findall('.//' + lxml_selector):
text = getattr(prop, 'text', None)
if text:
return text
def set_meta(self, key, value):
try:
tagname, namespace = self._property_table[key]
except KeyError:
raise exceptions.UnsupportedMetadataError()
lxml_selector = '{%s}%s' % (namespace, tagname)
element = etree.Element(lxml_selector)
element.text = value
data = '''<?xml version="1.0" encoding="utf-8" ?>
<D:propertyupdate xmlns:D="DAV:">
<D:set>
<D:prop>
{}
</D:prop>
</D:set>
</D:propertyupdate>
'''.format(to_native(etree.tostring(element)))
self.session.request(
'PROPPATCH', '',
data=data, headers=self.session.get_default_headers()
)
# XXX: Response content is currently ignored. Though exceptions are
# raised for HTTP errors, a multistatus with errorcodes inside is not
# parsed yet. Not sure how common those are, or how they look like. It
# might be easier (and safer in case of a stupid server) to just issue
# a PROPFIND to see if the value got actually set.
class CaldavStorage(DavStorage):
@ -598,6 +665,11 @@ class CaldavStorage(DavStorage):
get_multi_data_query = '{urn:ietf:params:xml:ns:caldav}calendar-data'
_property_table = dict(DavStorage._property_table)
_property_table.update({
'color': ('calendar-color', 'http://apple.com/ns/ical/'),
})
def __init__(self, start_date=None, end_date=None,
item_types=(), **kwargs):
super(CaldavStorage, self).__init__(**kwargs)

View file

@ -180,3 +180,21 @@ class FilesystemStorage(Storage):
subprocess.call([self.post_hook, fpath])
except OSError as e:
logger.warning('Error executing external hook: {}'.format(str(e)))
def get_meta(self, key):
fpath = os.path.join(self.path, key)
try:
with open(fpath, 'rb') as f:
return f.read().decode(self.encoding) or None
except IOError as e:
if e.errno == errno.ENOENT:
return None
else:
raise
def set_meta(self, key, value):
value = value or u''
assert isinstance(value, text_type)
fpath = os.path.join(self.path, key)
with atomic_write(fpath, mode='wb', overwrite=True) as f:
f.write(value.encode(self.encoding))

View file

@ -21,6 +21,7 @@ class MemoryStorage(Storage):
raise exceptions.UserError('MemoryStorage does not support '
'collections.')
self.items = {} # href => (etag, item)
self.metadata = {}
self.fileext = fileext
super(MemoryStorage, self).__init__(**kwargs)
@ -63,3 +64,9 @@ class MemoryStorage(Storage):
if etag != self.items[href][0]:
raise exceptions.WrongEtagError(etag)
del self.items[href]
def get_meta(self, key):
return self.metadata.get(key)
def set_meta(self, key, value):
self.metadata[key] = value