diff --git a/rust/Cargo.toml b/rust/Cargo.toml index a03d9b2..cee8f2a 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -9,7 +9,8 @@ name = "vdirsyncer_rustext" crate-type = ["cdylib"] [dependencies] -vobject = "0.2.0" +vobject = "0.3.0" +ring = "0.12.1" [build-dependencies] cbindgen = "0.1" diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ff67a60..a1a5209 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,10 +1,13 @@ extern crate vobject; +extern crate ring; use std::ffi::{CStr, CString}; use std::os::raw::c_char; use std::mem; use std::ptr; +use std::fmt::Write; + const EMPTY_STRING: *const c_char = b"\0" as *const u8 as *const c_char; #[repr(C)] @@ -100,3 +103,53 @@ pub unsafe extern "C" fn vdirsyncer_clone_component(c: *mut VdirsyncerComponent) pub unsafe extern "C" fn vdirsyncer_write_component(c: *mut VdirsyncerComponent) -> *const c_char { CString::new(vobject::write_component(&(*c).0)).unwrap().into_raw() } + +#[no_mangle] +pub unsafe extern "C" fn vdirsyncer_hash_component(c: *mut VdirsyncerComponent) -> *const c_char { + CString::new(safe_hash_component(&(*c).0)).unwrap().into_raw() +} + +fn safe_hash_component(c: &vobject::Component) -> String { + let mut new_c = c.clone(); + { + let mut stack = vec![&mut new_c]; + while let Some(component) = stack.pop() { + // PRODID is changed by radicale for some reason after upload + component.remove("PRODID"); + // Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't make a difference + component.remove("METHOD"); + // X-RADICALE-NAME is used by radicale, because hrefs don't really exist in their filesystem backend + component.remove("X-RADICALE-NAME"); + // Apparently this is set by Horde? + // https://github.com/pimutils/vdirsyncer/issues/318 + component.remove("X-WR-CALNAME"); + // Those are from the VCARD specification and is supposed to change when the + // item does -- however, we can determine that ourselves + component.remove("REV"); + component.remove("LAST-MODIFIED"); + component.remove("CREATED"); + // Some iCalendar HTTP calendars generate the DTSTAMP at request time, so + // this property always changes when the rest of the item didn't. Some do + // the same with the UID. + // + // - Google's read-only calendar links + // - http://www.feiertage-oesterreich.at/ + component.remove("DTSTAMP"); + component.remove("UID"); + + if component.name == "VCALENDAR" { + component.subcomponents.retain(|ref c| c.name != "VTIMEZONE"); + } + + stack.extend(component.subcomponents.iter_mut()); + } + } + + // FIXME: Possible optimization: Stream component to hasher instead of allocating new string + let digest = ring::digest::digest(&ring::digest::SHA256, vobject::write_component(&new_c).as_bytes()); + let mut rv = String::new(); + for &byte in digest.as_ref() { + write!(&mut rv, "{:x}", byte).unwrap(); + } + rv +} diff --git a/tests/__init__.py b/tests/__init__.py index 0f94351..d1c7e23 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,7 +7,7 @@ import random import hypothesis.strategies as st -from vdirsyncer.vobject import Item, normalize_item +from vdirsyncer.vobject import Item import urllib3 import urllib3.exceptions @@ -20,7 +20,7 @@ def blow_up(*a, **kw): def assert_item_equals(a, b): - assert normalize_item(a) == normalize_item(b) + assert a.hash == b.hash VCARD_TEMPLATE = u'''BEGIN:VCARD diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py index 9fef293..c55278b 100644 --- a/tests/storage/__init__.py +++ b/tests/storage/__init__.py @@ -15,7 +15,7 @@ from vdirsyncer.storage.base import normalize_meta_value from vdirsyncer.vobject import Item from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \ - assert_item_equals, format_item, normalize_item, \ + assert_item_equals, format_item, \ printable_characters_strategy @@ -348,4 +348,4 @@ class StorageTests(object): href, etag = s.upload(item) item2, etag2 = s.get(href) - assert normalize_item(item) == normalize_item(item2) + assert item.hash == item2.hash diff --git a/tests/storage/dav/__init__.py b/tests/storage/dav/__init__.py index 77109e8..b59e4d3 100644 --- a/tests/storage/dav/__init__.py +++ b/tests/storage/dav/__init__.py @@ -6,14 +6,8 @@ import os import pytest -import requests -import requests.exceptions - from tests import assert_item_equals -from vdirsyncer import exceptions -from vdirsyncer.vobject import Item - from .. import StorageTests, get_server_mixin @@ -24,14 +18,6 @@ ServerMixin = get_server_mixin(dav_server) class DAVStorageTests(ServerMixin, StorageTests): dav_server = dav_server - @pytest.mark.skipif(dav_server == 'radicale', - reason='Radicale is very tolerant.') - def test_dav_broken_item(self, s): - item = Item(u'HAHA:YES') - with pytest.raises((exceptions.Error, requests.exceptions.HTTPError)): - s.upload(item) - assert not list(s.list()) - def test_dav_empty_get_multi_performance(self, s, monkeypatch): def breakdown(*a, **kw): raise AssertionError('Expected not to be called.') diff --git a/tests/storage/test_http.py b/tests/storage/test_http.py index da14d5d..d6e6ff9 100644 --- a/tests/storage/test_http.py +++ b/tests/storage/test_http.py @@ -4,10 +4,9 @@ import pytest from requests import Response -from tests import normalize_item - from vdirsyncer.exceptions import UserError from vdirsyncer.storage.http import HttpStorage, prepare_auth +from vdirsyncer.vobject import Item def test_list(monkeypatch): @@ -56,9 +55,9 @@ def test_list(monkeypatch): item, etag2 = s.get(href) assert item.uid is not None assert etag2 == etag - found_items[normalize_item(item)] = href + found_items[item.hash] = href - expected = set(normalize_item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR') + expected = set(Item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR').hash for x in items) assert set(found_items) == expected @@ -67,7 +66,7 @@ def test_list(monkeypatch): item, etag2 = s.get(href) assert item.uid is not None assert etag2 == etag - assert found_items[normalize_item(item)] == href + assert found_items[item.hash] == href def test_readonly_param(): diff --git a/tests/unit/utils/test_vobject.py b/tests/unit/utils/test_vobject.py index 100756a..6d86921 100644 --- a/tests/unit/utils/test_vobject.py +++ b/tests/unit/utils/test_vobject.py @@ -9,7 +9,7 @@ from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule import pytest from tests import BARE_EVENT_TEMPLATE, EVENT_TEMPLATE, \ - EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, normalize_item, \ + EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, \ uid_strategy import vdirsyncer.vobject as vobject @@ -31,8 +31,8 @@ _simple_joined = u'\r\n'.join( def test_split_collection_simple(benchmark): given = benchmark(lambda: list(vobject.split_collection(_simple_joined))) - assert [normalize_item(item) for item in given] == \ - [normalize_item(item) for item in _simple_split] + assert [vobject.Item(item).hash for item in given] == \ + [vobject.Item(item).hash for item in _simple_split] assert [x.splitlines() for x in given] == \ [x.splitlines() for x in _simple_split] @@ -47,8 +47,8 @@ def test_split_collection_multiple_wrappers(benchmark): ) given = benchmark(lambda: list(vobject.split_collection(joined))) - assert [normalize_item(item) for item in given] == \ - [normalize_item(item) for item in _simple_split] + assert [vobject.Item(item).hash for item in given] == \ + [vobject.Item(item).hash for item in _simple_split] assert [x.splitlines() for x in given] == \ [x.splitlines() for x in _simple_split] @@ -56,7 +56,7 @@ def test_split_collection_multiple_wrappers(benchmark): def test_join_collection_simple(benchmark): given = benchmark(lambda: vobject.join_collection(_simple_split)) - assert normalize_item(given) == normalize_item(_simple_joined) + assert vobject.Item(given).hash == vobject.Item(_simple_joined).hash assert given.splitlines() == _simple_joined.splitlines() @@ -123,12 +123,12 @@ def test_split_collection_timezones(): [timezone, u'END:VCALENDAR'] ) - given = set(normalize_item(item) + given = set(vobject.Item(item).hash for item in vobject.split_collection(full)) expected = set( - normalize_item(u'\r\n'.join(( + vobject.Item(u'\r\n'.join(( u'BEGIN:VCALENDAR', item, timezone, u'END:VCALENDAR' - ))) + ))).hash for item in items ) @@ -146,11 +146,11 @@ def test_split_contacts(): with_wrapper.splitlines() -def test_hash_item(): +def test_hash_item2(): a = EVENT_TEMPLATE.format(r=1, uid=1) b = u'\n'.join(line for line in a.splitlines() if u'PRODID' not in line) - assert vobject.hash_item(a) == vobject.hash_item(b) + assert vobject.Item(a).hash == vobject.Item(b).hash def test_multiline_uid(benchmark): @@ -351,3 +351,47 @@ def test_component_contains(): with pytest.raises(ValueError): 42 in item + + +def test_hash_item(): + item1 = vobject.Item( + 'BEGIN:FOO\r\n' + 'X-RADICALE-NAME:YES\r\n' + 'END:FOO\r\n' + ) + + item2 = vobject.Item( + 'BEGIN:FOO\r\n' + 'X-RADICALE-NAME:NO\r\n' + 'END:FOO\r\n' + ) + + assert item1.hash == item2.hash + + item2 = vobject.Item( + 'BEGIN:FOO\r\n' + 'X-RADICALE-NAME:NO\r\n' + 'OTHER-PROP:YAY\r\n' + 'END:FOO\r\n' + ) + + assert item1.hash != item2.hash + + +def test_hash_item_timezones(): + item1 = vobject.Item( + 'BEGIN:VCALENDAR\r\n' + 'HELLO:HAHA\r\n' + 'BEGIN:VTIMEZONE\r\n' + 'PROP:YES\r\n' + 'END:VTIMEZONE\r\n' + 'END:VCALENDAR\r\n' + ) + + item2 = vobject.Item( + 'BEGIN:VCALENDAR\r\n' + 'HELLO:HAHA\r\n' + 'END:VCALENDAR\r\n' + ) + + assert item1.hash == item2.hash diff --git a/vdirsyncer/native.py b/vdirsyncer/native.py index 6eb54ea..cf4f5c1 100644 --- a/vdirsyncer/native.py +++ b/vdirsyncer/native.py @@ -39,3 +39,7 @@ def _component_rv(c): def clone_component(c): return _component_rv(lib.vdirsyncer_clone_component(c)) + + +def hash_component(c): + return _string_rv(lib.vdirsyncer_hash_component(c)) diff --git a/vdirsyncer/vobject.py b/vdirsyncer/vobject.py index 397bea8..3e8c402 100644 --- a/vdirsyncer/vobject.py +++ b/vdirsyncer/vobject.py @@ -1,40 +1,11 @@ # -*- coding: utf-8 -*- -import hashlib from itertools import chain, tee from .utils import cached_property, uniq from . import exceptions, native -IGNORE_PROPS = ( - # PRODID is changed by radicale for some reason after upload - 'PRODID', - # Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't - # make a difference - 'METHOD', - # X-RADICALE-NAME is used by radicale, because hrefs don't really exist in - # their filesystem backend - 'X-RADICALE-NAME', - # Apparently this is set by Horde? - # https://github.com/pimutils/vdirsyncer/issues/318 - 'X-WR-CALNAME', - # Those are from the VCARD specification and is supposed to change when the - # item does -- however, we can determine that ourselves - 'REV', - 'LAST-MODIFIED', - 'CREATED', - # Some iCalendar HTTP calendars generate the DTSTAMP at request time, so - # this property always changes when the rest of the item didn't. Some do - # the same with the UID. - # - # - Google's read-only calendar links - # - http://www.feiertage-oesterreich.at/ - 'DTSTAMP', - 'UID', -) - - class Item(object): '''Immutable wrapper class for VCALENDAR (VEVENT, VTODO) and @@ -76,8 +47,11 @@ class Item(object): @cached_property def hash(self): - '''Hash of self.raw, used for etags.''' - return hash_item(self.raw) + '''Used for etags.''' + if not self.is_valid: + raise ValueError('Item malformed.') + + return native.hash_component(self._component) @cached_property def ident(self): @@ -107,36 +81,6 @@ class Item(object): return bool(self._component) -def normalize_item(item, ignore_props=IGNORE_PROPS): - '''Create syntactically invalid mess that is equal for similar items.''' - if not isinstance(item, Item): - item = Item(item) - - item = _strip_timezones(item) - - x = _Component('TEMP', item.raw.splitlines(), []) - for prop in IGNORE_PROPS: - del x[prop] - - x.props.sort() - return u'\r\n'.join(filter(bool, (line.strip() for line in x.props))) - - -def _strip_timezones(item): - parsed = item.parsed - if not parsed or parsed.name != 'VCALENDAR': - return item - - parsed.subcomponents = [c for c in parsed.subcomponents - if c.name != 'VTIMEZONE'] - - return Item('\r\n'.join(parsed.dump_lines())) - - -def hash_item(text): - return hashlib.sha256(normalize_item(text).encode('utf-8')).hexdigest() - - def split_collection(text): assert isinstance(text, str) inline = []