Change the custom URI parser to be a bit more conservative

First try the default URI(), and if it fails relax the restrictions on
the path component as a fallback.
This commit is contained in:
Akinori MUSHA 2023-11-02 01:48:31 +09:00
parent 27cc46c1d7
commit 8930674adf
5 changed files with 45 additions and 58 deletions

View file

@ -276,7 +276,7 @@ class HTTP::Cookie
logger = options[:logger] logger = options[:logger]
created_at = options[:created_at] created_at = options[:created_at]
end end
origin = HTTP::Cookie::URIParser.instance.convert_to_uri(origin) origin = HTTP::Cookie::URIParser.parse(origin)
[].tap { |cookies| [].tap { |cookies|
Scanner.new(set_cookie, logger).scan_set_cookie { |name, value, attrs| Scanner.new(set_cookie, logger).scan_set_cookie { |name, value, attrs|
@ -456,7 +456,7 @@ class HTTP::Cookie
@origin.nil? or @origin.nil? or
raise ArgumentError, "origin cannot be changed once it is set" raise ArgumentError, "origin cannot be changed once it is set"
# Delay setting @origin because #domain= or #path= may fail # Delay setting @origin because #domain= or #path= may fail
origin = HTTP::Cookie::URIParser.instance.convert_to_uri(origin) origin = HTTP::Cookie::URIParser.parse(origin)
if URI::HTTP === origin if URI::HTTP === origin
self.domain ||= origin.host self.domain ||= origin.host
self.path ||= (origin + './').path self.path ||= (origin + './').path
@ -549,7 +549,7 @@ class HTTP::Cookie
# Tests if it is OK to accept this cookie if it is sent from a given # Tests if it is OK to accept this cookie if it is sent from a given
# URI/URL, `uri`. # URI/URL, `uri`.
def acceptable_from_uri?(uri) def acceptable_from_uri?(uri)
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri) uri = HTTP::Cookie::URIParser.parse(uri)
return false unless URI::HTTP === uri && uri.host return false unless URI::HTTP === uri && uri.host
host = DomainName.new(uri.host) host = DomainName.new(uri.host)
@ -586,7 +586,7 @@ class HTTP::Cookie
if @domain.nil? if @domain.nil?
raise "cannot tell if this cookie is valid because the domain is unknown" raise "cannot tell if this cookie is valid because the domain is unknown"
end end
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri) uri = HTTP::Cookie::URIParser.parse(uri)
# RFC 6265 5.4 # RFC 6265 5.4
return false if secure? && !(URI::HTTPS === uri) return false if secure? && !(URI::HTTPS === uri)
acceptable_from_uri?(uri) && HTTP::Cookie.path_match?(@path, uri.path) acceptable_from_uri?(uri) && HTTP::Cookie.path_match?(@path, uri.path)

View file

@ -1,49 +1,36 @@
require 'singleton' module HTTP::Cookie::URIParser
module_function
class HTTP::Cookie::URIParser # Regular Expression taken from RFC 3986 Appendix B
include Singleton URIREGEX = %r{
\A
(?: (?<scheme> [^:/?\#]+ ) : )?
(?: // (?<authority> [^/?\#]* ) )?
(?<path> [^?\#]* )
(?: \? (?<query> [^\#]* ) )?
(?: \# (?<fragment> .* ) )?
\z
}x
REGEXP = { # Escape RFC 3986 "reserved" characters minus valid characters for path
ABS_PATH: /\A[^?#]*\z/ # More specifically, gen-delims minus "/" / "?" / "#"
} def escape_path(path)
path.sub(/\A[^?#]+/) { |p| p.gsub(/[:\[\]@]+/) { |r| CGI.escape(r) } }
def parse(uri)
m = /
\A
(?<scheme>https?)
:\/\/
((?<userinfo>.*)@)?
(?<host>[^\/]+)
(:(?<port>\d+))?
(?<path>[^?#]*)
(\?(?<query>[^#]*))?
(\#(?<fragment>.*))?
/xi.match(uri.to_s)
# Not an absolute HTTP/HTTPS URI
return URI::DEFAULT_PARSER.parse(uri) unless m
URI.scheme_list[m['scheme'].upcase].new(
m['scheme'],
m['userinfo'],
m['host'],
m['port'],
nil, # registry
m['path'],
nil, # opaque
m['query'],
m['fragment'],
self
)
end end
def convert_to_uri(uri) # Parse a URI string or object, relaxing the constraints on the path component
if uri.is_a?(URI::Generic) def parse(uri)
uri URI(uri)
elsif uri = String.try_convert(uri) rescue URI::InvalidURIError
parse(uri) str = String.try_convert(uri) or
else
raise ArgumentError, "bad argument (expected URI object or URI string)" raise ArgumentError, "bad argument (expected URI object or URI string)"
end
m = URIREGEX.match(str) or raise
path = m[:path]
str[m.begin(:path)...m.end(:path)] = escape_path(path)
uri = URI.parse(str)
uri.__send__(:set_path, path)
uri
end end
end end

View file

@ -156,7 +156,7 @@ class HTTP::CookieJar
block_given? or return enum_for(__method__, uri) block_given? or return enum_for(__method__, uri)
if uri if uri
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri) uri = HTTP::Cookie::URIParser.parse(uri)
return self unless URI::HTTP === uri && uri.host return self unless URI::HTTP === uri && uri.host
end end

View file

@ -948,7 +948,7 @@ class TestHTTPCookie < Test::Unit::TestCase
} }
cookie = HTTP::Cookie.new('a', 'b') cookie = HTTP::Cookie.new('a', 'b')
cookie.origin = HTTP::Cookie::URIParser.instance.parse('http://example.com/path[]/') cookie.origin = HTTP::Cookie::URIParser.parse('http://example.com/path[]/')
assert_equal '/path[]/', cookie.path assert_equal '/path[]/', cookie.path
cookie = HTTP::Cookie.new('a', 'b', :domain => '.example.com') cookie = HTTP::Cookie.new('a', 'b', :domain => '.example.com')
@ -1033,14 +1033,14 @@ class TestHTTPCookie < Test::Unit::TestCase
] ]
}, },
HTTP::Cookie.parse('a4=b; domain=example.com; path=/dir2[]/', HTTP::Cookie.parse('a4=b; domain=example.com; path=/dir2[]/',
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir[]/file.html')).first => { HTTP::Cookie::URIParser.parse('http://example.com/dir[]/file.html')).first => {
true => [ true => [
HTTP::Cookie::URIParser.instance.parse('https://example.com/dir2[]/file.html'), HTTP::Cookie::URIParser.parse('https://example.com/dir2[]/file.html'),
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir2[]/file.html'), HTTP::Cookie::URIParser.parse('http://example.com/dir2[]/file.html'),
], ],
false => [ false => [
HTTP::Cookie::URIParser.instance.parse('https://example.com/dir[]/file.html'), HTTP::Cookie::URIParser.parse('https://example.com/dir[]/file.html'),
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir[]/file.html'), HTTP::Cookie::URIParser.parse('http://example.com/dir[]/file.html'),
'file:///dir2/test.html', 'file:///dir2/test.html',
] ]
}, },
@ -1091,7 +1091,7 @@ class TestHTTPCookie < Test::Unit::TestCase
hash.each { |expected, urls| hash.each { |expected, urls|
urls.each { |url| urls.each { |url|
assert_equal expected, cookie.valid_for_uri?(url), '%s: %s' % [cookie.name, url] assert_equal expected, cookie.valid_for_uri?(url), '%s: %s' % [cookie.name, url]
assert_equal expected, cookie.valid_for_uri?(HTTP::Cookie::URIParser.instance.parse(url)), "%s: URI(%s)" % [cookie.name, url] assert_equal expected, cookie.valid_for_uri?(HTTP::Cookie::URIParser.parse(url)), "%s: URI(%s)" % [cookie.name, url]
} }
} }
} }

View file

@ -465,7 +465,7 @@ module TestHTTPCookieJar
end end
def test_save_and_read_cookiestxt def test_save_and_read_cookiestxt
url = HTTP::Cookie::URIParser.instance.convert_to_uri('https://rubyforge.org/foo[]/') url = HTTP::Cookie::URIParser.parse('https://rubyforge.org/foo[]/')
# Add one cookie with an expiration date in the future # Add one cookie with an expiration date in the future
cookie = HTTP::Cookie.new(cookie_values) cookie = HTTP::Cookie.new(cookie_values)
@ -657,7 +657,7 @@ module TestHTTPCookieJar
end end
def test_non_rfc3986_compliant_paths def test_non_rfc3986_compliant_paths
url = HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/login[]') url = HTTP::Cookie::URIParser.parse('http://RubyForge.org/login[]')
values = cookie_values(:path => "/login[]", :expires => nil, :origin => url) values = cookie_values(:path => "/login[]", :expires => nil, :origin => url)
@ -671,8 +671,8 @@ module TestHTTPCookieJar
assert_equal(2, @jar.cookies(url).length) assert_equal(2, @jar.cookies(url).length)
# Make sure we don't get the cookie in a different path # Make sure we don't get the cookie in a different path
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/hello[]')).length) assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.parse('http://RubyForge.org/hello[]')).length)
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/')).length) assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.parse('http://RubyForge.org/')).length)
# Expire the first cookie # Expire the first cookie
@jar.add(HTTP::Cookie.new(values.merge( :expires => Time.now - (10 * 86400)))) @jar.add(HTTP::Cookie.new(values.merge( :expires => Time.now - (10 * 86400))))