mirror of
https://github.com/samsonjs/vdirsyncer.git
synced 2026-03-26 09:05:50 +00:00
382 lines
12 KiB
Python
382 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import hashlib
|
|
from itertools import chain, tee
|
|
|
|
from .utils import cached_property, uniq
|
|
|
|
|
|
IGNORE_PROPS = (
|
|
# PRODID is changed by radicale for some reason after upload
|
|
'PRODID',
|
|
# Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't
|
|
# make a difference
|
|
'METHOD',
|
|
# X-RADICALE-NAME is used by radicale, because hrefs don't really exist in
|
|
# their filesystem backend
|
|
'X-RADICALE-NAME',
|
|
# Apparently this is set by Horde?
|
|
# https://github.com/pimutils/vdirsyncer/issues/318
|
|
'X-WR-CALNAME',
|
|
# Those are from the VCARD specification and is supposed to change when the
|
|
# item does -- however, we can determine that ourselves
|
|
'REV',
|
|
'LAST-MODIFIED',
|
|
'CREATED',
|
|
# Some iCalendar HTTP calendars generate the DTSTAMP at request time, so
|
|
# this property always changes when the rest of the item didn't. Some do
|
|
# the same with the UID.
|
|
#
|
|
# - Google's read-only calendar links
|
|
# - http://www.feiertage-oesterreich.at/
|
|
'DTSTAMP',
|
|
'UID',
|
|
)
|
|
|
|
|
|
class Item(object):
|
|
|
|
'''Immutable wrapper class for VCALENDAR (VEVENT, VTODO) and
|
|
VCARD'''
|
|
|
|
def __init__(self, raw):
|
|
assert isinstance(raw, str)
|
|
self._raw = raw
|
|
|
|
def with_uid(self, new_uid):
|
|
parsed = _Component.parse(self.raw)
|
|
stack = [parsed]
|
|
while stack:
|
|
component = stack.pop()
|
|
stack.extend(component.subcomponents)
|
|
|
|
if component.name in ('VEVENT', 'VTODO', 'VJOURNAL', 'VCARD'):
|
|
del component['UID']
|
|
if new_uid:
|
|
component['UID'] = new_uid
|
|
|
|
return Item('\r\n'.join(parsed.dump_lines()))
|
|
|
|
@cached_property
|
|
def raw(self):
|
|
'''Raw content of the item, as unicode string.
|
|
|
|
Vdirsyncer doesn't validate the content in any way.
|
|
'''
|
|
return self._raw
|
|
|
|
@cached_property
|
|
def uid(self):
|
|
'''Global identifier of the item, across storages, doesn't change after
|
|
a modification of the item.'''
|
|
# Don't actually parse component, but treat all lines as single
|
|
# component, avoiding traversal through all subcomponents.
|
|
x = _Component('TEMP', self.raw.splitlines(), [])
|
|
try:
|
|
return x['UID'].strip() or None
|
|
except KeyError:
|
|
return None
|
|
|
|
@cached_property
|
|
def hash(self):
|
|
'''Hash of self.raw, used for etags.'''
|
|
return hash_item(self.raw)
|
|
|
|
@cached_property
|
|
def ident(self):
|
|
'''Used for generating hrefs and matching up items during
|
|
synchronization. This is either the UID or the hash of the item's
|
|
content.'''
|
|
|
|
# We hash the item instead of directly using its raw content, because
|
|
#
|
|
# 1. The raw content might be really large, e.g. when its a contact
|
|
# with a picture, which bloats the status file.
|
|
#
|
|
# 2. The status file would contain really sensitive information.
|
|
return self.uid or self.hash
|
|
|
|
@property
|
|
def parsed(self):
|
|
'''Don't cache because the rv is mutable.'''
|
|
try:
|
|
return _Component.parse(self.raw)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def normalize_item(item, ignore_props=IGNORE_PROPS):
|
|
'''Create syntactically invalid mess that is equal for similar items.'''
|
|
if not isinstance(item, Item):
|
|
item = Item(item)
|
|
|
|
item = _strip_timezones(item)
|
|
|
|
x = _Component('TEMP', item.raw.splitlines(), [])
|
|
for prop in IGNORE_PROPS:
|
|
del x[prop]
|
|
|
|
x.props.sort()
|
|
return u'\r\n'.join(filter(bool, (line.strip() for line in x.props)))
|
|
|
|
|
|
def _strip_timezones(item):
|
|
parsed = item.parsed
|
|
if not parsed or parsed.name != 'VCALENDAR':
|
|
return item
|
|
|
|
parsed.subcomponents = [c for c in parsed.subcomponents
|
|
if c.name != 'VTIMEZONE']
|
|
|
|
return Item('\r\n'.join(parsed.dump_lines()))
|
|
|
|
|
|
def hash_item(text):
|
|
return hashlib.sha256(normalize_item(text).encode('utf-8')).hexdigest()
|
|
|
|
|
|
def split_collection(text):
|
|
assert isinstance(text, str)
|
|
inline = []
|
|
items = {} # uid => item
|
|
ungrouped_items = []
|
|
|
|
for main in _Component.parse(text, multiple=True):
|
|
_split_collection_impl(main, main, inline, items, ungrouped_items)
|
|
|
|
for item in chain(items.values(), ungrouped_items):
|
|
item.subcomponents.extend(inline)
|
|
yield u'\r\n'.join(item.dump_lines())
|
|
|
|
|
|
def _split_collection_impl(item, main, inline, items, ungrouped_items):
|
|
if item.name == u'VTIMEZONE':
|
|
inline.append(item)
|
|
elif item.name == u'VCARD':
|
|
ungrouped_items.append(item)
|
|
elif item.name in (u'VTODO', u'VEVENT', u'VJOURNAL'):
|
|
uid = item.get(u'UID', u'')
|
|
wrapper = _Component(main.name, main.props[:], [])
|
|
|
|
if uid.strip():
|
|
wrapper = items.setdefault(uid, wrapper)
|
|
else:
|
|
ungrouped_items.append(wrapper)
|
|
|
|
wrapper.subcomponents.append(item)
|
|
elif item.name in (u'VCALENDAR', u'VADDRESSBOOK'):
|
|
if item.name == 'VCALENDAR':
|
|
del item['METHOD']
|
|
for subitem in item.subcomponents:
|
|
_split_collection_impl(subitem, item, inline, items,
|
|
ungrouped_items)
|
|
else:
|
|
raise ValueError('Unknown component: {}'
|
|
.format(item.name))
|
|
|
|
|
|
_default_join_wrappers = {
|
|
u'VCALENDAR': u'VCALENDAR',
|
|
u'VEVENT': u'VCALENDAR',
|
|
u'VTODO': u'VCALENDAR',
|
|
u'VCARD': u'VADDRESSBOOK'
|
|
}
|
|
|
|
|
|
def join_collection(items, wrappers=_default_join_wrappers):
|
|
'''
|
|
:param wrappers: {
|
|
item_type: wrapper_type
|
|
}
|
|
'''
|
|
|
|
items1, items2 = tee((_Component.parse(x)
|
|
for x in items), 2)
|
|
item_type, wrapper_type = _get_item_type(items1, wrappers)
|
|
wrapper_props = []
|
|
|
|
def _get_item_components(x):
|
|
if x.name == wrapper_type:
|
|
wrapper_props.extend(x.props)
|
|
return x.subcomponents
|
|
else:
|
|
return [x]
|
|
|
|
components = chain(*(_get_item_components(x) for x in items2))
|
|
lines = chain(*uniq(tuple(x.dump_lines()) for x in components))
|
|
|
|
if wrapper_type is not None:
|
|
lines = chain(*(
|
|
[u'BEGIN:{}'.format(wrapper_type)],
|
|
# XXX: wrapper_props is a list of lines (with line-wrapping), so
|
|
# filtering out duplicate lines will almost certainly break
|
|
# multiline-values. Since the only props we usually need to
|
|
# support are PRODID and VERSION, I don't care.
|
|
uniq(wrapper_props),
|
|
lines,
|
|
[u'END:{}'.format(wrapper_type)]
|
|
))
|
|
return u''.join(line + u'\r\n' for line in lines)
|
|
|
|
|
|
def _get_item_type(components, wrappers):
|
|
i = 0
|
|
for component in components:
|
|
i += 1
|
|
try:
|
|
item_type = component.name
|
|
wrapper_type = wrappers[item_type]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
return item_type, wrapper_type
|
|
|
|
if not i:
|
|
return None, None
|
|
else:
|
|
raise ValueError('Not sure how to join components.')
|
|
|
|
|
|
class _Component(object):
|
|
'''
|
|
Raw outline of the components.
|
|
|
|
Vdirsyncer's operations on iCalendar and VCard objects are limited to
|
|
retrieving the UID and splitting larger files into items. Consequently this
|
|
parser is very lazy, with the downside that manipulation of item properties
|
|
are extremely costly.
|
|
|
|
Other features:
|
|
|
|
- Preserve the original property order and wrapping.
|
|
- Don't choke on irrelevant details like invalid datetime formats.
|
|
|
|
Original version from https://github.com/collective/icalendar/, but apart
|
|
from the similar API, very few parts have been reused.
|
|
'''
|
|
|
|
def __init__(self, name, lines, subcomponents):
|
|
'''
|
|
:param name: The component name.
|
|
:param lines: The component's own properties, as list of lines
|
|
(strings).
|
|
:param subcomponents: List of components.
|
|
'''
|
|
self.name = name
|
|
self.props = lines
|
|
self.subcomponents = subcomponents
|
|
|
|
@classmethod
|
|
def parse(cls, lines, multiple=False):
|
|
if isinstance(lines, bytes):
|
|
lines = lines.decode('utf-8')
|
|
if isinstance(lines, str):
|
|
lines = lines.splitlines()
|
|
|
|
stack = []
|
|
rv = []
|
|
try:
|
|
for _i, line in enumerate(lines):
|
|
if line.startswith(u'BEGIN:'):
|
|
c_name = line[len(u'BEGIN:'):].strip().upper()
|
|
stack.append(cls(c_name, [], []))
|
|
elif line.startswith(u'END:'):
|
|
component = stack.pop()
|
|
if stack:
|
|
stack[-1].subcomponents.append(component)
|
|
else:
|
|
rv.append(component)
|
|
else:
|
|
if line.strip():
|
|
stack[-1].props.append(line)
|
|
except IndexError:
|
|
raise ValueError('Parsing error at line {}'.format(_i + 1))
|
|
|
|
if multiple:
|
|
return rv
|
|
elif len(rv) != 1:
|
|
raise ValueError('Found {} components, expected one.'
|
|
.format(len(rv)))
|
|
else:
|
|
return rv[0]
|
|
|
|
def dump_lines(self):
|
|
yield u'BEGIN:{}'.format(self.name)
|
|
for line in self.props:
|
|
yield line
|
|
for c in self.subcomponents:
|
|
for line in c.dump_lines():
|
|
yield line
|
|
yield u'END:{}'.format(self.name)
|
|
|
|
def __delitem__(self, key):
|
|
prefix = (u'{}:'.format(key), u'{};'.format(key))
|
|
new_lines = []
|
|
lineiter = iter(self.props)
|
|
while True:
|
|
for line in lineiter:
|
|
if line.startswith(prefix):
|
|
break
|
|
else:
|
|
new_lines.append(line)
|
|
else:
|
|
break
|
|
|
|
for line in lineiter:
|
|
if not line.startswith((u' ', u'\t')):
|
|
new_lines.append(line)
|
|
break
|
|
|
|
self.props = new_lines
|
|
|
|
def __setitem__(self, key, val):
|
|
assert isinstance(val, str)
|
|
assert u'\n' not in val
|
|
del self[key]
|
|
line = u'{}:{}'.format(key, val)
|
|
self.props.append(line)
|
|
|
|
def __contains__(self, obj):
|
|
if isinstance(obj, type(self)):
|
|
return obj not in self.subcomponents and \
|
|
not any(obj in x for x in self.subcomponents)
|
|
elif isinstance(obj, str):
|
|
return self.get(obj, None) is not None
|
|
else:
|
|
raise ValueError(obj)
|
|
|
|
def __getitem__(self, key):
|
|
prefix_without_params = '{}:'.format(key)
|
|
prefix_with_params = '{};'.format(key)
|
|
iterlines = iter(self.props)
|
|
for line in iterlines:
|
|
if line.startswith(prefix_without_params):
|
|
rv = line[len(prefix_without_params):]
|
|
break
|
|
elif line.startswith(prefix_with_params):
|
|
rv = line[len(prefix_with_params):].split(':', 1)[-1]
|
|
break
|
|
else:
|
|
raise KeyError()
|
|
|
|
for line in iterlines:
|
|
if line.startswith((u' ', u'\t')):
|
|
rv += line[1:]
|
|
else:
|
|
break
|
|
|
|
return rv
|
|
|
|
def get(self, key, default=None):
|
|
try:
|
|
return self[key]
|
|
except KeyError:
|
|
return default
|
|
|
|
def __eq__(self, other):
|
|
return (
|
|
isinstance(other, type(self)) and
|
|
self.name == other.name and
|
|
self.props == other.props and
|
|
self.subcomponents == other.subcomponents
|
|
)
|