From ec2f743ffab8089fd49124b2bb26a5296244f8f1 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Fri, 22 Apr 2016 22:30:48 +0200 Subject: [PATCH] Replace LXML with stdlib XML Just because the XML module is already there, and suits our needs as well. DoS concerns should be irrelevant as we trust the server to that extent. --- CHANGELOG.rst | 3 +++ setup.py | 11 ---------- tests/storage/dav/test_utils.py | 10 --------- vdirsyncer/storage/dav.py | 38 ++++++++++++++------------------- 4 files changed, 19 insertions(+), 43 deletions(-) delete mode 100644 tests/storage/dav/test_utils.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9e152e9..29c7ed0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,9 @@ Version 0.10.0 file. See :gh:`409`. - The ``collections`` parameter can now be used to synchronize differently-named collections with each other. +- **Packagers:** The ``lxml`` dependency has been dropped. +- XML parsing is now a lot stricter. Malfunctioning servers that used to work + with vdirsyncer may stop working. Version 0.9.3 ============= diff --git a/setup.py b/setup.py index de1b219..c4536ac 100644 --- a/setup.py +++ b/setup.py @@ -10,8 +10,6 @@ Vdirsyncer is a synchronization tool for vdir. See the README for more details. # detectable for setuptools-scm. Rather use the PyPI ones. -import platform - from setuptools import Command, find_packages, setup @@ -28,15 +26,6 @@ requirements = [ # replicate vdirsyncer's current behavior (verifying fingerprints without # verifying against CAs) with older versions of urllib3. 'requests >=2.4.1, !=2.9.0', - 'lxml >=3.1' + ( - # See https://github.com/pimutils/vdirsyncer/issues/298 - # We pin some LXML version that is known to work with PyPy - # I assume nobody actually uses PyPy with vdirsyncer, so this is - # moot - ', <=3.4.4' - if platform.python_implementation() == 'PyPy' - else '' - ), # https://github.com/sigmavirus24/requests-toolbelt/pull/28 # And https://github.com/sigmavirus24/requests-toolbelt/issues/54 'requests_toolbelt >=0.4.0', diff --git a/tests/storage/dav/test_utils.py b/tests/storage/dav/test_utils.py deleted file mode 100644 index b9b3ed9..0000000 --- a/tests/storage/dav/test_utils.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- - -from vdirsyncer.storage.dav import _parse_xml - - -def test_broken_xml(capsys): - rv = _parse_xml(b'

\x10haha

') - assert rv.text == 'haha' - warnings = capsys.readouterr()[1] - assert 'partially invalid xml' in warnings.lower() diff --git a/vdirsyncer/storage/dav.py b/vdirsyncer/storage/dav.py index d8c9fe5..77af2a2 100644 --- a/vdirsyncer/storage/dav.py +++ b/vdirsyncer/storage/dav.py @@ -3,7 +3,7 @@ import datetime import logging -from lxml import etree +import xml.etree.ElementTree as etree import requests from requests.exceptions import HTTPError @@ -69,18 +69,12 @@ class InvalidXMLResponse(exceptions.InvalidResponse): def _parse_xml(content): - p = etree.XMLParser(recover=True) - rv = etree.XML(content, parser=p) - if rv is None: + try: + return etree.XML(content) + except etree.ParseError as e: raise InvalidXMLResponse('Invalid XML encountered: {}\n' 'Double-check the URLs in your config.' - .format(p.error_log)) - if p.error_log: - dav_logger.warning('Partially invalid XML response, some of your ' - 'items may be corrupted. Check the debug log and ' - 'consider switching servers. ({})' - .format(p.error_log)) - return rv + .format(e)) def _merge_xml(items): @@ -177,7 +171,7 @@ class Discover(object): root = etree.fromstring(response.content) # Better don't do string formatting here, because of XML namespaces - rv = root.find('.//' + self._homeset_tag + '/{*}href') + rv = root.find('.//' + self._homeset_tag + '/{DAV:}href') if rv is None: raise InvalidXMLResponse() return utils.compat.urlparse.urljoin(response.url, rv.text) @@ -192,11 +186,11 @@ class Discover(object): root = _parse_xml(r.content) done = set() for response in root.findall('{DAV:}response'): - props = _merge_xml(response.findall('{*}propstat/{*}prop')) - if props.find('{*}resourcetype/{*}' + self._resourcetype) is None: + props = _merge_xml(response.findall('{DAV:}propstat/{DAV:}prop')) + if props.find('{DAV:}resourcetype/' + self._resourcetype) is None: continue - href = response.find('{*}href') + href = response.find('{DAV:}href') if href is None: raise InvalidXMLResponse() href = utils.compat.urlparse.urljoin(r.url, href.text) @@ -261,7 +255,7 @@ class Discover(object): class CalDiscover(Discover): _namespace = 'urn:ietf:params:xml:ns:caldav' - _resourcetype = 'calendar' + _resourcetype = '{%s}calendar' % _namespace _homeset_xml = """ @@ -269,13 +263,13 @@ class CalDiscover(Discover): """ - _homeset_tag = '{*}calendar-home-set' + _homeset_tag = '{%s}calendar-home-set' % _namespace _well_known_uri = '/.well-known/caldav/' class CardDiscover(Discover): _namespace = 'urn:ietf:params:xml:ns:carddav' - _resourcetype = 'addressbook' + _resourcetype = '{%s}addressbook' % _namespace _homeset_xml = """ @@ -283,7 +277,7 @@ class CardDiscover(Discover): """ - _homeset_tag = '{*}addressbook-home-set' + _homeset_tag = '{%s}addressbook-home-set' % _namespace _well_known_uri = '/.well-known/carddav/' @@ -581,7 +575,7 @@ class DavStorage(Storage): except KeyError: raise exceptions.UnsupportedMetadataError() - lxml_selector = '{%s}%s' % (namespace, tagname) + xpath = '{%s}%s' % (namespace, tagname) data = ''' @@ -589,7 +583,7 @@ class DavStorage(Storage): '''.format( - to_native(etree.tostring(etree.Element(lxml_selector))) + to_native(etree.tostring(etree.Element(xpath))) ) headers = self.session.get_default_headers() @@ -602,7 +596,7 @@ class DavStorage(Storage): root = _parse_xml(response.content) - for prop in root.findall('.//' + lxml_selector): + for prop in root.findall('.//' + xpath): text = normalize_meta_value(getattr(prop, 'text', None)) if text: return text