mirror of
https://github.com/samsonjs/vdirsyncer.git
synced 2026-04-27 14:57:41 +00:00
Move item hashing into rust
This commit is contained in:
parent
c17fa308fb
commit
bdbfc360ff
9 changed files with 127 additions and 96 deletions
|
|
@ -9,7 +9,8 @@ name = "vdirsyncer_rustext"
|
||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
vobject = "0.2.0"
|
vobject = "0.3.0"
|
||||||
|
ring = "0.12.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
cbindgen = "0.1"
|
cbindgen = "0.1"
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
extern crate vobject;
|
extern crate vobject;
|
||||||
|
extern crate ring;
|
||||||
|
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::{CStr, CString};
|
||||||
use std::os::raw::c_char;
|
use std::os::raw::c_char;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
|
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
const EMPTY_STRING: *const c_char = b"\0" as *const u8 as *const c_char;
|
const EMPTY_STRING: *const c_char = b"\0" as *const u8 as *const c_char;
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
|
|
@ -100,3 +103,53 @@ pub unsafe extern "C" fn vdirsyncer_clone_component(c: *mut VdirsyncerComponent)
|
||||||
pub unsafe extern "C" fn vdirsyncer_write_component(c: *mut VdirsyncerComponent) -> *const c_char {
|
pub unsafe extern "C" fn vdirsyncer_write_component(c: *mut VdirsyncerComponent) -> *const c_char {
|
||||||
CString::new(vobject::write_component(&(*c).0)).unwrap().into_raw()
|
CString::new(vobject::write_component(&(*c).0)).unwrap().into_raw()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub unsafe extern "C" fn vdirsyncer_hash_component(c: *mut VdirsyncerComponent) -> *const c_char {
|
||||||
|
CString::new(safe_hash_component(&(*c).0)).unwrap().into_raw()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn safe_hash_component(c: &vobject::Component) -> String {
|
||||||
|
let mut new_c = c.clone();
|
||||||
|
{
|
||||||
|
let mut stack = vec![&mut new_c];
|
||||||
|
while let Some(component) = stack.pop() {
|
||||||
|
// PRODID is changed by radicale for some reason after upload
|
||||||
|
component.remove("PRODID");
|
||||||
|
// Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't make a difference
|
||||||
|
component.remove("METHOD");
|
||||||
|
// X-RADICALE-NAME is used by radicale, because hrefs don't really exist in their filesystem backend
|
||||||
|
component.remove("X-RADICALE-NAME");
|
||||||
|
// Apparently this is set by Horde?
|
||||||
|
// https://github.com/pimutils/vdirsyncer/issues/318
|
||||||
|
component.remove("X-WR-CALNAME");
|
||||||
|
// Those are from the VCARD specification and is supposed to change when the
|
||||||
|
// item does -- however, we can determine that ourselves
|
||||||
|
component.remove("REV");
|
||||||
|
component.remove("LAST-MODIFIED");
|
||||||
|
component.remove("CREATED");
|
||||||
|
// Some iCalendar HTTP calendars generate the DTSTAMP at request time, so
|
||||||
|
// this property always changes when the rest of the item didn't. Some do
|
||||||
|
// the same with the UID.
|
||||||
|
//
|
||||||
|
// - Google's read-only calendar links
|
||||||
|
// - http://www.feiertage-oesterreich.at/
|
||||||
|
component.remove("DTSTAMP");
|
||||||
|
component.remove("UID");
|
||||||
|
|
||||||
|
if component.name == "VCALENDAR" {
|
||||||
|
component.subcomponents.retain(|ref c| c.name != "VTIMEZONE");
|
||||||
|
}
|
||||||
|
|
||||||
|
stack.extend(component.subcomponents.iter_mut());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Possible optimization: Stream component to hasher instead of allocating new string
|
||||||
|
let digest = ring::digest::digest(&ring::digest::SHA256, vobject::write_component(&new_c).as_bytes());
|
||||||
|
let mut rv = String::new();
|
||||||
|
for &byte in digest.as_ref() {
|
||||||
|
write!(&mut rv, "{:x}", byte).unwrap();
|
||||||
|
}
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import random
|
||||||
|
|
||||||
import hypothesis.strategies as st
|
import hypothesis.strategies as st
|
||||||
|
|
||||||
from vdirsyncer.vobject import Item, normalize_item
|
from vdirsyncer.vobject import Item
|
||||||
|
|
||||||
import urllib3
|
import urllib3
|
||||||
import urllib3.exceptions
|
import urllib3.exceptions
|
||||||
|
|
@ -20,7 +20,7 @@ def blow_up(*a, **kw):
|
||||||
|
|
||||||
|
|
||||||
def assert_item_equals(a, b):
|
def assert_item_equals(a, b):
|
||||||
assert normalize_item(a) == normalize_item(b)
|
assert a.hash == b.hash
|
||||||
|
|
||||||
|
|
||||||
VCARD_TEMPLATE = u'''BEGIN:VCARD
|
VCARD_TEMPLATE = u'''BEGIN:VCARD
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from vdirsyncer.storage.base import normalize_meta_value
|
||||||
from vdirsyncer.vobject import Item
|
from vdirsyncer.vobject import Item
|
||||||
|
|
||||||
from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
|
from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
|
||||||
assert_item_equals, format_item, normalize_item, \
|
assert_item_equals, format_item, \
|
||||||
printable_characters_strategy
|
printable_characters_strategy
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -348,4 +348,4 @@ class StorageTests(object):
|
||||||
href, etag = s.upload(item)
|
href, etag = s.upload(item)
|
||||||
|
|
||||||
item2, etag2 = s.get(href)
|
item2, etag2 = s.get(href)
|
||||||
assert normalize_item(item) == normalize_item(item2)
|
assert item.hash == item2.hash
|
||||||
|
|
|
||||||
|
|
@ -6,14 +6,8 @@ import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import requests
|
|
||||||
import requests.exceptions
|
|
||||||
|
|
||||||
from tests import assert_item_equals
|
from tests import assert_item_equals
|
||||||
|
|
||||||
from vdirsyncer import exceptions
|
|
||||||
from vdirsyncer.vobject import Item
|
|
||||||
|
|
||||||
from .. import StorageTests, get_server_mixin
|
from .. import StorageTests, get_server_mixin
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,14 +18,6 @@ ServerMixin = get_server_mixin(dav_server)
|
||||||
class DAVStorageTests(ServerMixin, StorageTests):
|
class DAVStorageTests(ServerMixin, StorageTests):
|
||||||
dav_server = dav_server
|
dav_server = dav_server
|
||||||
|
|
||||||
@pytest.mark.skipif(dav_server == 'radicale',
|
|
||||||
reason='Radicale is very tolerant.')
|
|
||||||
def test_dav_broken_item(self, s):
|
|
||||||
item = Item(u'HAHA:YES')
|
|
||||||
with pytest.raises((exceptions.Error, requests.exceptions.HTTPError)):
|
|
||||||
s.upload(item)
|
|
||||||
assert not list(s.list())
|
|
||||||
|
|
||||||
def test_dav_empty_get_multi_performance(self, s, monkeypatch):
|
def test_dav_empty_get_multi_performance(self, s, monkeypatch):
|
||||||
def breakdown(*a, **kw):
|
def breakdown(*a, **kw):
|
||||||
raise AssertionError('Expected not to be called.')
|
raise AssertionError('Expected not to be called.')
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,9 @@ import pytest
|
||||||
|
|
||||||
from requests import Response
|
from requests import Response
|
||||||
|
|
||||||
from tests import normalize_item
|
|
||||||
|
|
||||||
from vdirsyncer.exceptions import UserError
|
from vdirsyncer.exceptions import UserError
|
||||||
from vdirsyncer.storage.http import HttpStorage, prepare_auth
|
from vdirsyncer.storage.http import HttpStorage, prepare_auth
|
||||||
|
from vdirsyncer.vobject import Item
|
||||||
|
|
||||||
|
|
||||||
def test_list(monkeypatch):
|
def test_list(monkeypatch):
|
||||||
|
|
@ -56,9 +55,9 @@ def test_list(monkeypatch):
|
||||||
item, etag2 = s.get(href)
|
item, etag2 = s.get(href)
|
||||||
assert item.uid is not None
|
assert item.uid is not None
|
||||||
assert etag2 == etag
|
assert etag2 == etag
|
||||||
found_items[normalize_item(item)] = href
|
found_items[item.hash] = href
|
||||||
|
|
||||||
expected = set(normalize_item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR')
|
expected = set(Item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR').hash
|
||||||
for x in items)
|
for x in items)
|
||||||
|
|
||||||
assert set(found_items) == expected
|
assert set(found_items) == expected
|
||||||
|
|
@ -67,7 +66,7 @@ def test_list(monkeypatch):
|
||||||
item, etag2 = s.get(href)
|
item, etag2 = s.get(href)
|
||||||
assert item.uid is not None
|
assert item.uid is not None
|
||||||
assert etag2 == etag
|
assert etag2 == etag
|
||||||
assert found_items[normalize_item(item)] == href
|
assert found_items[item.hash] == href
|
||||||
|
|
||||||
|
|
||||||
def test_readonly_param():
|
def test_readonly_param():
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from tests import BARE_EVENT_TEMPLATE, EVENT_TEMPLATE, \
|
from tests import BARE_EVENT_TEMPLATE, EVENT_TEMPLATE, \
|
||||||
EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, normalize_item, \
|
EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, \
|
||||||
uid_strategy
|
uid_strategy
|
||||||
|
|
||||||
import vdirsyncer.vobject as vobject
|
import vdirsyncer.vobject as vobject
|
||||||
|
|
@ -31,8 +31,8 @@ _simple_joined = u'\r\n'.join(
|
||||||
def test_split_collection_simple(benchmark):
|
def test_split_collection_simple(benchmark):
|
||||||
given = benchmark(lambda: list(vobject.split_collection(_simple_joined)))
|
given = benchmark(lambda: list(vobject.split_collection(_simple_joined)))
|
||||||
|
|
||||||
assert [normalize_item(item) for item in given] == \
|
assert [vobject.Item(item).hash for item in given] == \
|
||||||
[normalize_item(item) for item in _simple_split]
|
[vobject.Item(item).hash for item in _simple_split]
|
||||||
|
|
||||||
assert [x.splitlines() for x in given] == \
|
assert [x.splitlines() for x in given] == \
|
||||||
[x.splitlines() for x in _simple_split]
|
[x.splitlines() for x in _simple_split]
|
||||||
|
|
@ -47,8 +47,8 @@ def test_split_collection_multiple_wrappers(benchmark):
|
||||||
)
|
)
|
||||||
given = benchmark(lambda: list(vobject.split_collection(joined)))
|
given = benchmark(lambda: list(vobject.split_collection(joined)))
|
||||||
|
|
||||||
assert [normalize_item(item) for item in given] == \
|
assert [vobject.Item(item).hash for item in given] == \
|
||||||
[normalize_item(item) for item in _simple_split]
|
[vobject.Item(item).hash for item in _simple_split]
|
||||||
|
|
||||||
assert [x.splitlines() for x in given] == \
|
assert [x.splitlines() for x in given] == \
|
||||||
[x.splitlines() for x in _simple_split]
|
[x.splitlines() for x in _simple_split]
|
||||||
|
|
@ -56,7 +56,7 @@ def test_split_collection_multiple_wrappers(benchmark):
|
||||||
|
|
||||||
def test_join_collection_simple(benchmark):
|
def test_join_collection_simple(benchmark):
|
||||||
given = benchmark(lambda: vobject.join_collection(_simple_split))
|
given = benchmark(lambda: vobject.join_collection(_simple_split))
|
||||||
assert normalize_item(given) == normalize_item(_simple_joined)
|
assert vobject.Item(given).hash == vobject.Item(_simple_joined).hash
|
||||||
assert given.splitlines() == _simple_joined.splitlines()
|
assert given.splitlines() == _simple_joined.splitlines()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -123,12 +123,12 @@ def test_split_collection_timezones():
|
||||||
[timezone, u'END:VCALENDAR']
|
[timezone, u'END:VCALENDAR']
|
||||||
)
|
)
|
||||||
|
|
||||||
given = set(normalize_item(item)
|
given = set(vobject.Item(item).hash
|
||||||
for item in vobject.split_collection(full))
|
for item in vobject.split_collection(full))
|
||||||
expected = set(
|
expected = set(
|
||||||
normalize_item(u'\r\n'.join((
|
vobject.Item(u'\r\n'.join((
|
||||||
u'BEGIN:VCALENDAR', item, timezone, u'END:VCALENDAR'
|
u'BEGIN:VCALENDAR', item, timezone, u'END:VCALENDAR'
|
||||||
)))
|
))).hash
|
||||||
for item in items
|
for item in items
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -146,11 +146,11 @@ def test_split_contacts():
|
||||||
with_wrapper.splitlines()
|
with_wrapper.splitlines()
|
||||||
|
|
||||||
|
|
||||||
def test_hash_item():
|
def test_hash_item2():
|
||||||
a = EVENT_TEMPLATE.format(r=1, uid=1)
|
a = EVENT_TEMPLATE.format(r=1, uid=1)
|
||||||
b = u'\n'.join(line for line in a.splitlines()
|
b = u'\n'.join(line for line in a.splitlines()
|
||||||
if u'PRODID' not in line)
|
if u'PRODID' not in line)
|
||||||
assert vobject.hash_item(a) == vobject.hash_item(b)
|
assert vobject.Item(a).hash == vobject.Item(b).hash
|
||||||
|
|
||||||
|
|
||||||
def test_multiline_uid(benchmark):
|
def test_multiline_uid(benchmark):
|
||||||
|
|
@ -351,3 +351,47 @@ def test_component_contains():
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
42 in item
|
42 in item
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_item():
|
||||||
|
item1 = vobject.Item(
|
||||||
|
'BEGIN:FOO\r\n'
|
||||||
|
'X-RADICALE-NAME:YES\r\n'
|
||||||
|
'END:FOO\r\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
item2 = vobject.Item(
|
||||||
|
'BEGIN:FOO\r\n'
|
||||||
|
'X-RADICALE-NAME:NO\r\n'
|
||||||
|
'END:FOO\r\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
assert item1.hash == item2.hash
|
||||||
|
|
||||||
|
item2 = vobject.Item(
|
||||||
|
'BEGIN:FOO\r\n'
|
||||||
|
'X-RADICALE-NAME:NO\r\n'
|
||||||
|
'OTHER-PROP:YAY\r\n'
|
||||||
|
'END:FOO\r\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
assert item1.hash != item2.hash
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_item_timezones():
|
||||||
|
item1 = vobject.Item(
|
||||||
|
'BEGIN:VCALENDAR\r\n'
|
||||||
|
'HELLO:HAHA\r\n'
|
||||||
|
'BEGIN:VTIMEZONE\r\n'
|
||||||
|
'PROP:YES\r\n'
|
||||||
|
'END:VTIMEZONE\r\n'
|
||||||
|
'END:VCALENDAR\r\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
item2 = vobject.Item(
|
||||||
|
'BEGIN:VCALENDAR\r\n'
|
||||||
|
'HELLO:HAHA\r\n'
|
||||||
|
'END:VCALENDAR\r\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
assert item1.hash == item2.hash
|
||||||
|
|
|
||||||
|
|
@ -39,3 +39,7 @@ def _component_rv(c):
|
||||||
|
|
||||||
def clone_component(c):
|
def clone_component(c):
|
||||||
return _component_rv(lib.vdirsyncer_clone_component(c))
|
return _component_rv(lib.vdirsyncer_clone_component(c))
|
||||||
|
|
||||||
|
|
||||||
|
def hash_component(c):
|
||||||
|
return _string_rv(lib.vdirsyncer_hash_component(c))
|
||||||
|
|
|
||||||
|
|
@ -1,40 +1,11 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import hashlib
|
|
||||||
from itertools import chain, tee
|
from itertools import chain, tee
|
||||||
|
|
||||||
from .utils import cached_property, uniq
|
from .utils import cached_property, uniq
|
||||||
from . import exceptions, native
|
from . import exceptions, native
|
||||||
|
|
||||||
|
|
||||||
IGNORE_PROPS = (
|
|
||||||
# PRODID is changed by radicale for some reason after upload
|
|
||||||
'PRODID',
|
|
||||||
# Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't
|
|
||||||
# make a difference
|
|
||||||
'METHOD',
|
|
||||||
# X-RADICALE-NAME is used by radicale, because hrefs don't really exist in
|
|
||||||
# their filesystem backend
|
|
||||||
'X-RADICALE-NAME',
|
|
||||||
# Apparently this is set by Horde?
|
|
||||||
# https://github.com/pimutils/vdirsyncer/issues/318
|
|
||||||
'X-WR-CALNAME',
|
|
||||||
# Those are from the VCARD specification and is supposed to change when the
|
|
||||||
# item does -- however, we can determine that ourselves
|
|
||||||
'REV',
|
|
||||||
'LAST-MODIFIED',
|
|
||||||
'CREATED',
|
|
||||||
# Some iCalendar HTTP calendars generate the DTSTAMP at request time, so
|
|
||||||
# this property always changes when the rest of the item didn't. Some do
|
|
||||||
# the same with the UID.
|
|
||||||
#
|
|
||||||
# - Google's read-only calendar links
|
|
||||||
# - http://www.feiertage-oesterreich.at/
|
|
||||||
'DTSTAMP',
|
|
||||||
'UID',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Item(object):
|
class Item(object):
|
||||||
|
|
||||||
'''Immutable wrapper class for VCALENDAR (VEVENT, VTODO) and
|
'''Immutable wrapper class for VCALENDAR (VEVENT, VTODO) and
|
||||||
|
|
@ -76,8 +47,11 @@ class Item(object):
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def hash(self):
|
def hash(self):
|
||||||
'''Hash of self.raw, used for etags.'''
|
'''Used for etags.'''
|
||||||
return hash_item(self.raw)
|
if not self.is_valid:
|
||||||
|
raise ValueError('Item malformed.')
|
||||||
|
|
||||||
|
return native.hash_component(self._component)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def ident(self):
|
def ident(self):
|
||||||
|
|
@ -107,36 +81,6 @@ class Item(object):
|
||||||
return bool(self._component)
|
return bool(self._component)
|
||||||
|
|
||||||
|
|
||||||
def normalize_item(item, ignore_props=IGNORE_PROPS):
|
|
||||||
'''Create syntactically invalid mess that is equal for similar items.'''
|
|
||||||
if not isinstance(item, Item):
|
|
||||||
item = Item(item)
|
|
||||||
|
|
||||||
item = _strip_timezones(item)
|
|
||||||
|
|
||||||
x = _Component('TEMP', item.raw.splitlines(), [])
|
|
||||||
for prop in IGNORE_PROPS:
|
|
||||||
del x[prop]
|
|
||||||
|
|
||||||
x.props.sort()
|
|
||||||
return u'\r\n'.join(filter(bool, (line.strip() for line in x.props)))
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_timezones(item):
|
|
||||||
parsed = item.parsed
|
|
||||||
if not parsed or parsed.name != 'VCALENDAR':
|
|
||||||
return item
|
|
||||||
|
|
||||||
parsed.subcomponents = [c for c in parsed.subcomponents
|
|
||||||
if c.name != 'VTIMEZONE']
|
|
||||||
|
|
||||||
return Item('\r\n'.join(parsed.dump_lines()))
|
|
||||||
|
|
||||||
|
|
||||||
def hash_item(text):
|
|
||||||
return hashlib.sha256(normalize_item(text).encode('utf-8')).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def split_collection(text):
|
def split_collection(text):
|
||||||
assert isinstance(text, str)
|
assert isinstance(text, str)
|
||||||
inline = []
|
inline = []
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue