mirror of
https://github.com/samsonjs/vdirsyncer.git
synced 2026-03-25 08:55:50 +00:00
Move item hashing into rust
This commit is contained in:
parent
c17fa308fb
commit
bdbfc360ff
9 changed files with 127 additions and 96 deletions
|
|
@ -9,7 +9,8 @@ name = "vdirsyncer_rustext"
|
|||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
vobject = "0.2.0"
|
||||
vobject = "0.3.0"
|
||||
ring = "0.12.1"
|
||||
|
||||
[build-dependencies]
|
||||
cbindgen = "0.1"
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
extern crate vobject;
|
||||
extern crate ring;
|
||||
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::os::raw::c_char;
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
|
||||
use std::fmt::Write;
|
||||
|
||||
const EMPTY_STRING: *const c_char = b"\0" as *const u8 as *const c_char;
|
||||
|
||||
#[repr(C)]
|
||||
|
|
@ -100,3 +103,53 @@ pub unsafe extern "C" fn vdirsyncer_clone_component(c: *mut VdirsyncerComponent)
|
|||
pub unsafe extern "C" fn vdirsyncer_write_component(c: *mut VdirsyncerComponent) -> *const c_char {
|
||||
CString::new(vobject::write_component(&(*c).0)).unwrap().into_raw()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn vdirsyncer_hash_component(c: *mut VdirsyncerComponent) -> *const c_char {
|
||||
CString::new(safe_hash_component(&(*c).0)).unwrap().into_raw()
|
||||
}
|
||||
|
||||
fn safe_hash_component(c: &vobject::Component) -> String {
|
||||
let mut new_c = c.clone();
|
||||
{
|
||||
let mut stack = vec![&mut new_c];
|
||||
while let Some(component) = stack.pop() {
|
||||
// PRODID is changed by radicale for some reason after upload
|
||||
component.remove("PRODID");
|
||||
// Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't make a difference
|
||||
component.remove("METHOD");
|
||||
// X-RADICALE-NAME is used by radicale, because hrefs don't really exist in their filesystem backend
|
||||
component.remove("X-RADICALE-NAME");
|
||||
// Apparently this is set by Horde?
|
||||
// https://github.com/pimutils/vdirsyncer/issues/318
|
||||
component.remove("X-WR-CALNAME");
|
||||
// Those are from the VCARD specification and is supposed to change when the
|
||||
// item does -- however, we can determine that ourselves
|
||||
component.remove("REV");
|
||||
component.remove("LAST-MODIFIED");
|
||||
component.remove("CREATED");
|
||||
// Some iCalendar HTTP calendars generate the DTSTAMP at request time, so
|
||||
// this property always changes when the rest of the item didn't. Some do
|
||||
// the same with the UID.
|
||||
//
|
||||
// - Google's read-only calendar links
|
||||
// - http://www.feiertage-oesterreich.at/
|
||||
component.remove("DTSTAMP");
|
||||
component.remove("UID");
|
||||
|
||||
if component.name == "VCALENDAR" {
|
||||
component.subcomponents.retain(|ref c| c.name != "VTIMEZONE");
|
||||
}
|
||||
|
||||
stack.extend(component.subcomponents.iter_mut());
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Possible optimization: Stream component to hasher instead of allocating new string
|
||||
let digest = ring::digest::digest(&ring::digest::SHA256, vobject::write_component(&new_c).as_bytes());
|
||||
let mut rv = String::new();
|
||||
for &byte in digest.as_ref() {
|
||||
write!(&mut rv, "{:x}", byte).unwrap();
|
||||
}
|
||||
rv
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import random
|
|||
|
||||
import hypothesis.strategies as st
|
||||
|
||||
from vdirsyncer.vobject import Item, normalize_item
|
||||
from vdirsyncer.vobject import Item
|
||||
|
||||
import urllib3
|
||||
import urllib3.exceptions
|
||||
|
|
@ -20,7 +20,7 @@ def blow_up(*a, **kw):
|
|||
|
||||
|
||||
def assert_item_equals(a, b):
|
||||
assert normalize_item(a) == normalize_item(b)
|
||||
assert a.hash == b.hash
|
||||
|
||||
|
||||
VCARD_TEMPLATE = u'''BEGIN:VCARD
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from vdirsyncer.storage.base import normalize_meta_value
|
|||
from vdirsyncer.vobject import Item
|
||||
|
||||
from .. import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE, \
|
||||
assert_item_equals, format_item, normalize_item, \
|
||||
assert_item_equals, format_item, \
|
||||
printable_characters_strategy
|
||||
|
||||
|
||||
|
|
@ -348,4 +348,4 @@ class StorageTests(object):
|
|||
href, etag = s.upload(item)
|
||||
|
||||
item2, etag2 = s.get(href)
|
||||
assert normalize_item(item) == normalize_item(item2)
|
||||
assert item.hash == item2.hash
|
||||
|
|
|
|||
|
|
@ -6,14 +6,8 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
import requests
|
||||
import requests.exceptions
|
||||
|
||||
from tests import assert_item_equals
|
||||
|
||||
from vdirsyncer import exceptions
|
||||
from vdirsyncer.vobject import Item
|
||||
|
||||
from .. import StorageTests, get_server_mixin
|
||||
|
||||
|
||||
|
|
@ -24,14 +18,6 @@ ServerMixin = get_server_mixin(dav_server)
|
|||
class DAVStorageTests(ServerMixin, StorageTests):
|
||||
dav_server = dav_server
|
||||
|
||||
@pytest.mark.skipif(dav_server == 'radicale',
|
||||
reason='Radicale is very tolerant.')
|
||||
def test_dav_broken_item(self, s):
|
||||
item = Item(u'HAHA:YES')
|
||||
with pytest.raises((exceptions.Error, requests.exceptions.HTTPError)):
|
||||
s.upload(item)
|
||||
assert not list(s.list())
|
||||
|
||||
def test_dav_empty_get_multi_performance(self, s, monkeypatch):
|
||||
def breakdown(*a, **kw):
|
||||
raise AssertionError('Expected not to be called.')
|
||||
|
|
|
|||
|
|
@ -4,10 +4,9 @@ import pytest
|
|||
|
||||
from requests import Response
|
||||
|
||||
from tests import normalize_item
|
||||
|
||||
from vdirsyncer.exceptions import UserError
|
||||
from vdirsyncer.storage.http import HttpStorage, prepare_auth
|
||||
from vdirsyncer.vobject import Item
|
||||
|
||||
|
||||
def test_list(monkeypatch):
|
||||
|
|
@ -56,9 +55,9 @@ def test_list(monkeypatch):
|
|||
item, etag2 = s.get(href)
|
||||
assert item.uid is not None
|
||||
assert etag2 == etag
|
||||
found_items[normalize_item(item)] = href
|
||||
found_items[item.hash] = href
|
||||
|
||||
expected = set(normalize_item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR')
|
||||
expected = set(Item(u'BEGIN:VCALENDAR\n' + x + '\nEND:VCALENDAR').hash
|
||||
for x in items)
|
||||
|
||||
assert set(found_items) == expected
|
||||
|
|
@ -67,7 +66,7 @@ def test_list(monkeypatch):
|
|||
item, etag2 = s.get(href)
|
||||
assert item.uid is not None
|
||||
assert etag2 == etag
|
||||
assert found_items[normalize_item(item)] == href
|
||||
assert found_items[item.hash] == href
|
||||
|
||||
|
||||
def test_readonly_param():
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule
|
|||
import pytest
|
||||
|
||||
from tests import BARE_EVENT_TEMPLATE, EVENT_TEMPLATE, \
|
||||
EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, normalize_item, \
|
||||
EVENT_WITH_TIMEZONE_TEMPLATE, VCARD_TEMPLATE, \
|
||||
uid_strategy
|
||||
|
||||
import vdirsyncer.vobject as vobject
|
||||
|
|
@ -31,8 +31,8 @@ _simple_joined = u'\r\n'.join(
|
|||
def test_split_collection_simple(benchmark):
|
||||
given = benchmark(lambda: list(vobject.split_collection(_simple_joined)))
|
||||
|
||||
assert [normalize_item(item) for item in given] == \
|
||||
[normalize_item(item) for item in _simple_split]
|
||||
assert [vobject.Item(item).hash for item in given] == \
|
||||
[vobject.Item(item).hash for item in _simple_split]
|
||||
|
||||
assert [x.splitlines() for x in given] == \
|
||||
[x.splitlines() for x in _simple_split]
|
||||
|
|
@ -47,8 +47,8 @@ def test_split_collection_multiple_wrappers(benchmark):
|
|||
)
|
||||
given = benchmark(lambda: list(vobject.split_collection(joined)))
|
||||
|
||||
assert [normalize_item(item) for item in given] == \
|
||||
[normalize_item(item) for item in _simple_split]
|
||||
assert [vobject.Item(item).hash for item in given] == \
|
||||
[vobject.Item(item).hash for item in _simple_split]
|
||||
|
||||
assert [x.splitlines() for x in given] == \
|
||||
[x.splitlines() for x in _simple_split]
|
||||
|
|
@ -56,7 +56,7 @@ def test_split_collection_multiple_wrappers(benchmark):
|
|||
|
||||
def test_join_collection_simple(benchmark):
|
||||
given = benchmark(lambda: vobject.join_collection(_simple_split))
|
||||
assert normalize_item(given) == normalize_item(_simple_joined)
|
||||
assert vobject.Item(given).hash == vobject.Item(_simple_joined).hash
|
||||
assert given.splitlines() == _simple_joined.splitlines()
|
||||
|
||||
|
||||
|
|
@ -123,12 +123,12 @@ def test_split_collection_timezones():
|
|||
[timezone, u'END:VCALENDAR']
|
||||
)
|
||||
|
||||
given = set(normalize_item(item)
|
||||
given = set(vobject.Item(item).hash
|
||||
for item in vobject.split_collection(full))
|
||||
expected = set(
|
||||
normalize_item(u'\r\n'.join((
|
||||
vobject.Item(u'\r\n'.join((
|
||||
u'BEGIN:VCALENDAR', item, timezone, u'END:VCALENDAR'
|
||||
)))
|
||||
))).hash
|
||||
for item in items
|
||||
)
|
||||
|
||||
|
|
@ -146,11 +146,11 @@ def test_split_contacts():
|
|||
with_wrapper.splitlines()
|
||||
|
||||
|
||||
def test_hash_item():
|
||||
def test_hash_item2():
|
||||
a = EVENT_TEMPLATE.format(r=1, uid=1)
|
||||
b = u'\n'.join(line for line in a.splitlines()
|
||||
if u'PRODID' not in line)
|
||||
assert vobject.hash_item(a) == vobject.hash_item(b)
|
||||
assert vobject.Item(a).hash == vobject.Item(b).hash
|
||||
|
||||
|
||||
def test_multiline_uid(benchmark):
|
||||
|
|
@ -351,3 +351,47 @@ def test_component_contains():
|
|||
|
||||
with pytest.raises(ValueError):
|
||||
42 in item
|
||||
|
||||
|
||||
def test_hash_item():
|
||||
item1 = vobject.Item(
|
||||
'BEGIN:FOO\r\n'
|
||||
'X-RADICALE-NAME:YES\r\n'
|
||||
'END:FOO\r\n'
|
||||
)
|
||||
|
||||
item2 = vobject.Item(
|
||||
'BEGIN:FOO\r\n'
|
||||
'X-RADICALE-NAME:NO\r\n'
|
||||
'END:FOO\r\n'
|
||||
)
|
||||
|
||||
assert item1.hash == item2.hash
|
||||
|
||||
item2 = vobject.Item(
|
||||
'BEGIN:FOO\r\n'
|
||||
'X-RADICALE-NAME:NO\r\n'
|
||||
'OTHER-PROP:YAY\r\n'
|
||||
'END:FOO\r\n'
|
||||
)
|
||||
|
||||
assert item1.hash != item2.hash
|
||||
|
||||
|
||||
def test_hash_item_timezones():
|
||||
item1 = vobject.Item(
|
||||
'BEGIN:VCALENDAR\r\n'
|
||||
'HELLO:HAHA\r\n'
|
||||
'BEGIN:VTIMEZONE\r\n'
|
||||
'PROP:YES\r\n'
|
||||
'END:VTIMEZONE\r\n'
|
||||
'END:VCALENDAR\r\n'
|
||||
)
|
||||
|
||||
item2 = vobject.Item(
|
||||
'BEGIN:VCALENDAR\r\n'
|
||||
'HELLO:HAHA\r\n'
|
||||
'END:VCALENDAR\r\n'
|
||||
)
|
||||
|
||||
assert item1.hash == item2.hash
|
||||
|
|
|
|||
|
|
@ -39,3 +39,7 @@ def _component_rv(c):
|
|||
|
||||
def clone_component(c):
|
||||
return _component_rv(lib.vdirsyncer_clone_component(c))
|
||||
|
||||
|
||||
def hash_component(c):
|
||||
return _string_rv(lib.vdirsyncer_hash_component(c))
|
||||
|
|
|
|||
|
|
@ -1,40 +1,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
from itertools import chain, tee
|
||||
|
||||
from .utils import cached_property, uniq
|
||||
from . import exceptions, native
|
||||
|
||||
|
||||
IGNORE_PROPS = (
|
||||
# PRODID is changed by radicale for some reason after upload
|
||||
'PRODID',
|
||||
# Sometimes METHOD:PUBLISH is added by WebCAL providers, for us it doesn't
|
||||
# make a difference
|
||||
'METHOD',
|
||||
# X-RADICALE-NAME is used by radicale, because hrefs don't really exist in
|
||||
# their filesystem backend
|
||||
'X-RADICALE-NAME',
|
||||
# Apparently this is set by Horde?
|
||||
# https://github.com/pimutils/vdirsyncer/issues/318
|
||||
'X-WR-CALNAME',
|
||||
# Those are from the VCARD specification and is supposed to change when the
|
||||
# item does -- however, we can determine that ourselves
|
||||
'REV',
|
||||
'LAST-MODIFIED',
|
||||
'CREATED',
|
||||
# Some iCalendar HTTP calendars generate the DTSTAMP at request time, so
|
||||
# this property always changes when the rest of the item didn't. Some do
|
||||
# the same with the UID.
|
||||
#
|
||||
# - Google's read-only calendar links
|
||||
# - http://www.feiertage-oesterreich.at/
|
||||
'DTSTAMP',
|
||||
'UID',
|
||||
)
|
||||
|
||||
|
||||
class Item(object):
|
||||
|
||||
'''Immutable wrapper class for VCALENDAR (VEVENT, VTODO) and
|
||||
|
|
@ -76,8 +47,11 @@ class Item(object):
|
|||
|
||||
@cached_property
|
||||
def hash(self):
|
||||
'''Hash of self.raw, used for etags.'''
|
||||
return hash_item(self.raw)
|
||||
'''Used for etags.'''
|
||||
if not self.is_valid:
|
||||
raise ValueError('Item malformed.')
|
||||
|
||||
return native.hash_component(self._component)
|
||||
|
||||
@cached_property
|
||||
def ident(self):
|
||||
|
|
@ -107,36 +81,6 @@ class Item(object):
|
|||
return bool(self._component)
|
||||
|
||||
|
||||
def normalize_item(item, ignore_props=IGNORE_PROPS):
|
||||
'''Create syntactically invalid mess that is equal for similar items.'''
|
||||
if not isinstance(item, Item):
|
||||
item = Item(item)
|
||||
|
||||
item = _strip_timezones(item)
|
||||
|
||||
x = _Component('TEMP', item.raw.splitlines(), [])
|
||||
for prop in IGNORE_PROPS:
|
||||
del x[prop]
|
||||
|
||||
x.props.sort()
|
||||
return u'\r\n'.join(filter(bool, (line.strip() for line in x.props)))
|
||||
|
||||
|
||||
def _strip_timezones(item):
|
||||
parsed = item.parsed
|
||||
if not parsed or parsed.name != 'VCALENDAR':
|
||||
return item
|
||||
|
||||
parsed.subcomponents = [c for c in parsed.subcomponents
|
||||
if c.name != 'VTIMEZONE']
|
||||
|
||||
return Item('\r\n'.join(parsed.dump_lines()))
|
||||
|
||||
|
||||
def hash_item(text):
|
||||
return hashlib.sha256(normalize_item(text).encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def split_collection(text):
|
||||
assert isinstance(text, str)
|
||||
inline = []
|
||||
|
|
|
|||
Loading…
Reference in a new issue