From 2e7e31fdbfb1a9b60551e388df2b218a5b838dff Mon Sep 17 00:00:00 2001 From: euxane Date: Sun, 1 Sep 2024 15:02:55 +0200 Subject: [PATCH] storage/http: add support for `filter_hook` This allows users to process fetched items through a filter command, to fix malformed webcal items as they are imported. In my case, my provider adds the export time to the description and random sequence numbers to all events. This caused the whole collection to be invalidated and propagated at each sync. I use the filter to remove those, canonicalising the items. --- CHANGELOG.rst | 1 + docs/config.rst | 6 ++++++ vdirsyncer/storage/http.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f0ded6a..b43a33d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,7 @@ Version 0.19.3 - Require matching ``BEGIN`` and ``END`` lines in vobjects. :gh:`1103` - A Docker environment for Vdirsyncer has been added `Vdirsyncer DOCKERIZED `_. - Implement digest auth. :gh:`1137` +- Add ``filter_hook`` parameter to :storage:`http`. :gh:`1136` Version 0.19.2 ============== diff --git a/docs/config.rst b/docs/config.rst index d157be3..a351f5c 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -484,6 +484,7 @@ leads to an error. [storage holidays_remote] type = "http" url = https://example.com/holidays_from_hicksville.ics + #filter_hook = null Too many WebCAL providers generate UIDs of all ``VEVENT``-components on-the-fly, i.e. all UIDs change every time the calendar is downloaded. @@ -508,3 +509,8 @@ leads to an error. :param auth_cert: Optional. Either a path to a certificate with a client certificate and the key or a list of paths to the files with them. :param useragent: Default ``vdirsyncer``. + :param filter_hook: Optional. A filter command to call for each fetched + item, passed in raw form to stdin and returned via stdout. + If nothing is returned by the filter command, the item is skipped. + This can be used to alter fields as needed when dealing with providers + generating malformed events. diff --git a/vdirsyncer/storage/http.py b/vdirsyncer/storage/http.py index 41d94e8..9c4ce40 100644 --- a/vdirsyncer/storage/http.py +++ b/vdirsyncer/storage/http.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging +import subprocess import urllib.parse as urlparse import aiohttp @@ -14,6 +16,8 @@ from ..vobject import Item from ..vobject import split_collection from .base import Storage +logger = logging.getLogger(__name__) + class HttpStorage(Storage): storage_name = "http" @@ -34,6 +38,7 @@ class HttpStorage(Storage): useragent=USERAGENT, verify_fingerprint=None, auth_cert=None, + filter_hook=None, *, connector, **kwargs, @@ -56,6 +61,7 @@ class HttpStorage(Storage): self.useragent = useragent assert connector is not None self.connector = connector + self._filter_hook = filter_hook collection = kwargs.get("collection") if collection is not None: @@ -66,6 +72,19 @@ class HttpStorage(Storage): def _default_headers(self): return {"User-Agent": self.useragent} + def _run_filter_hook(self, raw_item): + try: + result = subprocess.run( + [self._filter_hook], + input=raw_item, + capture_output=True, + encoding="utf-8", + ) + return result.stdout + except OSError as e: + logger.warning(f"Error executing external command: {str(e)}") + return raw_item + async def list(self): async with aiohttp.ClientSession( connector=self.connector, @@ -82,8 +101,13 @@ class HttpStorage(Storage): ) self._items = {} - for item in split_collection((await r.read()).decode("utf-8")): - item = Item(item) + for raw_item in split_collection((await r.read()).decode("utf-8")): + if self._filter_hook: + raw_item = self._run_filter_hook(raw_item) + if not raw_item: + continue + + item = Item(raw_item) if self._ignore_uids: item = item.with_uid(item.hash)