From 209648a936a22f787723f3eca94fab2d73e13706 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Sun, 17 May 2015 18:07:03 +0200 Subject: [PATCH] Make XML parsing more tolerant re invalid chars Fix #207 --- tests/storage/dav/test_main.py | 9 ++++++++- vdirsyncer/storage/dav.py | 14 ++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/storage/dav/test_main.py b/tests/storage/dav/test_main.py index f5e8100..91b7f94 100644 --- a/tests/storage/dav/test_main.py +++ b/tests/storage/dav/test_main.py @@ -13,7 +13,7 @@ from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE import vdirsyncer.exceptions as exceptions from vdirsyncer.storage.base import Item -from vdirsyncer.storage.dav import CaldavStorage, CarddavStorage +from vdirsyncer.storage.dav import CaldavStorage, CarddavStorage, _parse_xml from .. import StorageTests, format_item @@ -182,3 +182,10 @@ class TestCarddavStorage(DavStorageTests): @pytest.fixture def item_template(self): return VCARD_TEMPLATE + + +def test_broken_xml(capsys): + rv = _parse_xml(b'

\x10haha

') + assert rv.text == 'haha' + warnings = capsys.readouterr()[1] + assert 'partially invalid xml' in warnings.lower() diff --git a/vdirsyncer/storage/dav.py b/vdirsyncer/storage/dav.py index 99f5027..a91e704 100644 --- a/vdirsyncer/storage/dav.py +++ b/vdirsyncer/storage/dav.py @@ -42,12 +42,18 @@ class InvalidXMLResponse(exceptions.InvalidResponse): def _parse_xml(content): - try: - return etree.XML(content) - except etree.Error as e: + p = etree.XMLParser(recover=True) + rv = etree.XML(content, parser=p) + if rv is None: raise InvalidXMLResponse('Invalid XML encountered: {}\n' 'Double-check the URLs in your config.' - .format(e)) + .format(p.error_log)) + if p.error_log: + dav_logger.warning('Partially invalid XML response, some of your ' + 'items may be corrupted. Check the debug log and ' + 'consider switching servers. ({})' + .format(p.error_log)) + return rv def _merge_xml(items):