Change the custom URI parser to be a bit more conservative

First try the default URI(), and if it fails relax the restrictions on
the path component as a fallback.
This commit is contained in:
Akinori MUSHA 2023-11-02 01:48:31 +09:00
parent 27cc46c1d7
commit 8930674adf
5 changed files with 45 additions and 58 deletions

View file

@ -276,7 +276,7 @@ class HTTP::Cookie
logger = options[:logger]
created_at = options[:created_at]
end
origin = HTTP::Cookie::URIParser.instance.convert_to_uri(origin)
origin = HTTP::Cookie::URIParser.parse(origin)
[].tap { |cookies|
Scanner.new(set_cookie, logger).scan_set_cookie { |name, value, attrs|
@ -456,7 +456,7 @@ class HTTP::Cookie
@origin.nil? or
raise ArgumentError, "origin cannot be changed once it is set"
# Delay setting @origin because #domain= or #path= may fail
origin = HTTP::Cookie::URIParser.instance.convert_to_uri(origin)
origin = HTTP::Cookie::URIParser.parse(origin)
if URI::HTTP === origin
self.domain ||= origin.host
self.path ||= (origin + './').path
@ -549,7 +549,7 @@ class HTTP::Cookie
# Tests if it is OK to accept this cookie if it is sent from a given
# URI/URL, `uri`.
def acceptable_from_uri?(uri)
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri)
uri = HTTP::Cookie::URIParser.parse(uri)
return false unless URI::HTTP === uri && uri.host
host = DomainName.new(uri.host)
@ -586,7 +586,7 @@ class HTTP::Cookie
if @domain.nil?
raise "cannot tell if this cookie is valid because the domain is unknown"
end
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri)
uri = HTTP::Cookie::URIParser.parse(uri)
# RFC 6265 5.4
return false if secure? && !(URI::HTTPS === uri)
acceptable_from_uri?(uri) && HTTP::Cookie.path_match?(@path, uri.path)

View file

@ -1,49 +1,36 @@
require 'singleton'
module HTTP::Cookie::URIParser
module_function
class HTTP::Cookie::URIParser
include Singleton
# Regular Expression taken from RFC 3986 Appendix B
URIREGEX = %r{
\A
(?: (?<scheme> [^:/?\#]+ ) : )?
(?: // (?<authority> [^/?\#]* ) )?
(?<path> [^?\#]* )
(?: \? (?<query> [^\#]* ) )?
(?: \# (?<fragment> .* ) )?
\z
}x
REGEXP = {
ABS_PATH: /\A[^?#]*\z/
}
def parse(uri)
m = /
\A
(?<scheme>https?)
:\/\/
((?<userinfo>.*)@)?
(?<host>[^\/]+)
(:(?<port>\d+))?
(?<path>[^?#]*)
(\?(?<query>[^#]*))?
(\#(?<fragment>.*))?
/xi.match(uri.to_s)
# Not an absolute HTTP/HTTPS URI
return URI::DEFAULT_PARSER.parse(uri) unless m
URI.scheme_list[m['scheme'].upcase].new(
m['scheme'],
m['userinfo'],
m['host'],
m['port'],
nil, # registry
m['path'],
nil, # opaque
m['query'],
m['fragment'],
self
)
# Escape RFC 3986 "reserved" characters minus valid characters for path
# More specifically, gen-delims minus "/" / "?" / "#"
def escape_path(path)
path.sub(/\A[^?#]+/) { |p| p.gsub(/[:\[\]@]+/) { |r| CGI.escape(r) } }
end
def convert_to_uri(uri)
if uri.is_a?(URI::Generic)
uri
elsif uri = String.try_convert(uri)
parse(uri)
else
# Parse a URI string or object, relaxing the constraints on the path component
def parse(uri)
URI(uri)
rescue URI::InvalidURIError
str = String.try_convert(uri) or
raise ArgumentError, "bad argument (expected URI object or URI string)"
end
m = URIREGEX.match(str) or raise
path = m[:path]
str[m.begin(:path)...m.end(:path)] = escape_path(path)
uri = URI.parse(str)
uri.__send__(:set_path, path)
uri
end
end

View file

@ -156,7 +156,7 @@ class HTTP::CookieJar
block_given? or return enum_for(__method__, uri)
if uri
uri = HTTP::Cookie::URIParser.instance.convert_to_uri(uri)
uri = HTTP::Cookie::URIParser.parse(uri)
return self unless URI::HTTP === uri && uri.host
end

View file

@ -948,7 +948,7 @@ class TestHTTPCookie < Test::Unit::TestCase
}
cookie = HTTP::Cookie.new('a', 'b')
cookie.origin = HTTP::Cookie::URIParser.instance.parse('http://example.com/path[]/')
cookie.origin = HTTP::Cookie::URIParser.parse('http://example.com/path[]/')
assert_equal '/path[]/', cookie.path
cookie = HTTP::Cookie.new('a', 'b', :domain => '.example.com')
@ -1033,14 +1033,14 @@ class TestHTTPCookie < Test::Unit::TestCase
]
},
HTTP::Cookie.parse('a4=b; domain=example.com; path=/dir2[]/',
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir[]/file.html')).first => {
HTTP::Cookie::URIParser.parse('http://example.com/dir[]/file.html')).first => {
true => [
HTTP::Cookie::URIParser.instance.parse('https://example.com/dir2[]/file.html'),
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir2[]/file.html'),
HTTP::Cookie::URIParser.parse('https://example.com/dir2[]/file.html'),
HTTP::Cookie::URIParser.parse('http://example.com/dir2[]/file.html'),
],
false => [
HTTP::Cookie::URIParser.instance.parse('https://example.com/dir[]/file.html'),
HTTP::Cookie::URIParser.instance.parse('http://example.com/dir[]/file.html'),
HTTP::Cookie::URIParser.parse('https://example.com/dir[]/file.html'),
HTTP::Cookie::URIParser.parse('http://example.com/dir[]/file.html'),
'file:///dir2/test.html',
]
},
@ -1091,7 +1091,7 @@ class TestHTTPCookie < Test::Unit::TestCase
hash.each { |expected, urls|
urls.each { |url|
assert_equal expected, cookie.valid_for_uri?(url), '%s: %s' % [cookie.name, url]
assert_equal expected, cookie.valid_for_uri?(HTTP::Cookie::URIParser.instance.parse(url)), "%s: URI(%s)" % [cookie.name, url]
assert_equal expected, cookie.valid_for_uri?(HTTP::Cookie::URIParser.parse(url)), "%s: URI(%s)" % [cookie.name, url]
}
}
}

View file

@ -465,7 +465,7 @@ module TestHTTPCookieJar
end
def test_save_and_read_cookiestxt
url = HTTP::Cookie::URIParser.instance.convert_to_uri('https://rubyforge.org/foo[]/')
url = HTTP::Cookie::URIParser.parse('https://rubyforge.org/foo[]/')
# Add one cookie with an expiration date in the future
cookie = HTTP::Cookie.new(cookie_values)
@ -657,7 +657,7 @@ module TestHTTPCookieJar
end
def test_non_rfc3986_compliant_paths
url = HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/login[]')
url = HTTP::Cookie::URIParser.parse('http://RubyForge.org/login[]')
values = cookie_values(:path => "/login[]", :expires => nil, :origin => url)
@ -671,8 +671,8 @@ module TestHTTPCookieJar
assert_equal(2, @jar.cookies(url).length)
# Make sure we don't get the cookie in a different path
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/hello[]')).length)
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.instance.convert_to_uri('http://RubyForge.org/')).length)
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.parse('http://RubyForge.org/hello[]')).length)
assert_equal(0, @jar.cookies(HTTP::Cookie::URIParser.parse('http://RubyForge.org/')).length)
# Expire the first cookie
@jar.add(HTTP::Cookie.new(values.merge( :expires => Time.now - (10 * 86400))))