Make XML parsing more tolerant re invalid chars

Fix #207
This commit is contained in:
Markus Unterwaditzer 2015-05-17 18:07:03 +02:00
parent 55be28c12b
commit 209648a936
2 changed files with 18 additions and 5 deletions

View file

@ -13,7 +13,7 @@ from tests import EVENT_TEMPLATE, TASK_TEMPLATE, VCARD_TEMPLATE
import vdirsyncer.exceptions as exceptions
from vdirsyncer.storage.base import Item
from vdirsyncer.storage.dav import CaldavStorage, CarddavStorage
from vdirsyncer.storage.dav import CaldavStorage, CarddavStorage, _parse_xml
from .. import StorageTests, format_item
@ -182,3 +182,10 @@ class TestCarddavStorage(DavStorageTests):
@pytest.fixture
def item_template(self):
return VCARD_TEMPLATE
def test_broken_xml(capsys):
rv = _parse_xml(b'<h1>\x10haha</h1>')
assert rv.text == 'haha'
warnings = capsys.readouterr()[1]
assert 'partially invalid xml' in warnings.lower()

View file

@ -42,12 +42,18 @@ class InvalidXMLResponse(exceptions.InvalidResponse):
def _parse_xml(content):
try:
return etree.XML(content)
except etree.Error as e:
p = etree.XMLParser(recover=True)
rv = etree.XML(content, parser=p)
if rv is None:
raise InvalidXMLResponse('Invalid XML encountered: {}\n'
'Double-check the URLs in your config.'
.format(e))
.format(p.error_log))
if p.error_log:
dav_logger.warning('Partially invalid XML response, some of your '
'items may be corrupted. Check the debug log and '
'consider switching servers. ({})'
.format(p.error_log))
return rv
def _merge_xml(items):