From ddf74fee1ed6269c2aacd508d4ddea9d8059b143 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 21 Mar 2013 10:18:23 +0900 Subject: [PATCH] Rewrite the Set-Cookie header parser entirely. The new parser is almost RFC 6265 compliant as the previous implementation but has some extensions: - It can parse double-quoted values with unsafe characters inside escaped with the backslash. - It parses a date value of the expires attribute in the way the RFC describes, with an exception that it allows omission of the seconds field. Some of the broken date representations that used to pass are now treated as error and ignored. - It can parse a Set-Cookie value that contains multiple cookie definitions separated by comma, and commas put inside double quotes are not mistaken as definition separator. --- lib/http/cookie.rb | 122 +++++++--------------- lib/http/cookie/scanner.rb | 200 +++++++++++++++++++++++++++++++++++++ test/test_http_cookie.rb | 49 ++++----- 3 files changed, 263 insertions(+), 108 deletions(-) create mode 100644 lib/http/cookie/scanner.rb diff --git a/lib/http/cookie.rb b/lib/http/cookie.rb index d4aea4d..c5ec060 100644 --- a/lib/http/cookie.rb +++ b/lib/http/cookie.rb @@ -196,6 +196,8 @@ class HTTP::Cookie end end + autoload :Scanner, 'http/cookie/scanner' + class << self # Normalizes a given path. If it is empty or it is a relative # path, the root path '/' is returned. @@ -243,88 +245,40 @@ class HTTP::Cookie date ||= Time.now [].tap { |cookies| - # The expires attribute may include a comma in the value. - set_cookie.split(/,(?=[^;,]*=|\s*\z)/).each { |c| - if c.bytesize > MAX_LENGTH - logger.warn("Cookie definition too long: #{c}") if logger - next - end + s = Scanner.new(set_cookie, logger) + until s.eos? + name, value, attrs = s.scan_cookie + break if name.nil? || name.empty? - first_elem, *cookie_elem = c.split(/;+/) - first_elem.strip! - key, value = first_elem.split(/\=/, 2) - # RFC 6265 2.2 - # A cookie-value may be DQUOTE'd. - case value - when /\A"(.*)"\z/ - value = $1.gsub(/\\(.)/, "\\1") - end - - begin - cookie = new(key, value.dup) - rescue - logger.warn("Couldn't parse key/value: #{first_elem}") if logger - next - end - - cookie_elem.each do |pair| - pair.strip! - key, value = pair.split(/=/, 2) #/) - next unless key - case value # may be nil - when /\A"(.*)"\z/ - value = $1.gsub(/\\(.)/, "\\1") - end - - case key.downcase - when 'domain' - next unless value && !value.empty? - begin - cookie.domain = value + cookie = new(name, value) + attrs.each { |aname, avalue| + begin + case aname + when 'domain' + cookie.domain = avalue cookie.for_domain = true - rescue - logger.warn("Couldn't parse domain: #{value}") if logger + when 'path' + cookie.path = avalue + when 'expires' + # RFC 6265 4.1.2.2 + # The Max-Age attribute has precedence over the Expires + # attribute. + cookie.expires = avalue unless cookie.max_age + when 'max-age' + cookie.max_age = avalue + when 'comment' + cookie.comment = avalue + when 'version' + cookie.version = avalue + when 'secure' + cookie.secure = avalue + when 'httponly' + cookie.httponly = avalue end - when 'path' - next unless value && !value.empty? - cookie.path = value - when 'expires' - # RFC 6265 4.1.2.2 - # The Max-Age attribute has precedence over the Expires - # attribute. - next unless value && !value.empty? && cookie.max_age.nil? - begin - cookie.expires = Time.parse(value) - rescue - logger.warn("Couldn't parse expires: #{value}") if logger - end - when 'max-age' - next unless value && !value.empty? - begin - cookie.max_age = Integer(value) - rescue - logger.warn("Couldn't parse max age '#{value}'") if logger - end - when 'comment' - next unless value - cookie.comment = value - when 'version' - next unless value - begin - cookie.version = Integer(value) - rescue - logger.warn("Couldn't parse version '#{value}'") if logger - cookie.version = nil - end - when 'secure' - cookie.secure = true - when 'httponly' - cookie.httponly = true + rescue => e + logger.warn("Couldn't parse #{aname} '#{avalue}': #{e}") if logger end - end - - cookie.secure ||= false - cookie.httponly ||= false + } # Have `expires` set instead of `max_age`, so that # expiration check (`expired?`) can be performed. @@ -342,7 +296,7 @@ class HTTP::Cookie yield cookie if block_given? cookies << cookie - } + end } end end @@ -570,12 +524,12 @@ class HTTP::Cookie origin = origin ? URI(origin) : @origin or raise "origin must be specified to produce a value for Set-Cookie" - string = cookie_value + string = "#{@name}=#{Scanner.quote(@value)}" if @for_domain || @domain != DomainName.new(origin.host).hostname - string << "; domain=#{@domain}" + string << "; Domain=#{@domain}" end if (HTTP::Cookie.normalize_path(origin) + './').path != @path - string << "; path=#{@path}" + string << "; Path=#{@path}" end if @max_age string << "; Max-Age=#{@max_age}" @@ -583,13 +537,13 @@ class HTTP::Cookie string << "; Expires=#{@expires.httpdate}" end if @comment - string << "; comment=#{@comment}" + string << "; Comment=#{Scanner.quote(@comment)}" end if @httponly string << "; HttpOnly" end if @secure - string << "; secure" + string << "; Secure" end string end diff --git a/lib/http/cookie/scanner.rb b/lib/http/cookie/scanner.rb new file mode 100644 index 0000000..742c4e3 --- /dev/null +++ b/lib/http/cookie/scanner.rb @@ -0,0 +1,200 @@ +require 'http/cookie' +require 'strscan' +require 'time' + +class HTTP::Cookie::Scanner < StringScanner + # Whitespace. + RE_WSP = /[ \t]+/ + + # A pattern that matches a cookie name or attribute name which may + # be empty, capturing trailing whitespace. + RE_NAME = /(?!#{RE_WSP})[^,;\\"=]*/ + + RE_BAD_CHAR = /([\x00-\x20\x7F",;\\])/ + + # A pattern that matches the comma in a (typically date) value. + RE_COOKIE_COMMA = /,(?=#{RE_WSP}?#{RE_NAME}=)/ + + def initialize(string, logger = nil) + @logger = logger + super(string) + end + + class << self + def quote(s) + return s unless s.match(RE_BAD_CHAR) + '"' << s.gsub(RE_BAD_CHAR, "\\\\\\1") << '"' + end + end + + def skip_wsp + skip(RE_WSP) + end + + def scan_dquoted + ''.tap { |s| + case + when skip(/"/) + break + when skip(/\\/) + s << getch + when scan(/[^"\\]+/) + s << matched + end until eos? + } + end + + def scan_name + scan(RE_NAME).tap { |s| + s.rstrip! if s + } + end + + def scan_value + ''.tap { |s| + case + when scan(/[^,;"]+/) + s << matched + when skip(/"/) + # RFC 6265 2.2 + # A cookie-value may be DQUOTE'd. + s << scan_dquoted + when check(/;|#{RE_COOKIE_COMMA}/o) + break + else + s << getch + end until eos? + s.rstrip! + } + end + + def scan_name_value + name = scan_name + if skip(/\=/) + value = scan_value + else + scan_value + value = nil + end + [name, value] + end + + def parse_cookie_date(s) + # RFC 6265 5.1.1 + time = day_of_month = month = year = nil + + s.split(/[\x09\x20-\x2F\x3B-\x40\x5B-\x60\x7B-\x7E]+/).each { |token| + case + when time.nil? && token.match(/\A(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?(?=\D|\z)/) + sec = + if $3 + $3.to_i + else + # violation of the RFC + @logger.warn("Time lacks the second part: #{token}") if @logger + 0 + end + time = [$1.to_i, $2.to_i, sec] + when day_of_month.nil? && token.match(/\A(\d{1,2})(?=\D|\z)/) + day_of_month = $1.to_i + when month.nil? && token.match(/\A(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i) + month = $1.capitalize + when year.nil? && token.match(/\A(\d{2,4})(?=\D|\z)/) + year = $1.to_i + end + } + + if day_of_month.nil? || month.nil? || year.nil? || time.nil? + return nil + end + + case day_of_month + when 1..31 + else + return nil + end + + case year + when 100..1600 + return nil + when 70..99 + year += 1900 + when 0..69 + year += 2000 + end + + if (time <=> [23,59,59]) > 0 + return nil + end + + Time.strptime( + '%02d %s %04d %02d:%02d:%02d UTC' % [day_of_month, month, year, *time], + '%d %b %Y %T %Z' + ).tap { |date| + date.day == day_of_month or return nil + } + end + + def scan_cookie + # cf. RFC 6265 5.2 + until eos? + start = pos + len = nil + + skip_wsp + + name, value = scan_name_value + if name.nil? + break + elsif value.nil? + @logger.warn("Cookie definition lacks a name-value pair.") if @logger + elsif name.empty? + @logger.warn("Cookie definition has an empty name.") if @logger + value = nil + end + attrs = {} + + case + when skip(/,/) + len = (pos - 1) - start + break + when skip(/;/) + skip_wsp + aname, avalue = scan_name_value + break if aname.nil? + next if aname.empty? || value.nil? + aname.downcase! + case aname + when 'expires' + # RFC 6265 5.2.1 + avalue &&= parse_cookie_date(avalue) or next + when 'max-age' + # RFC 6265 5.2.2 + next unless /\A-?\d+\z/.match(avalue) + when 'domain' + # RFC 6265 5.2.3 + # An empty value SHOULD be ignored. + next if avalue.nil? || avalue.empty? + when 'path' + # RFC 6265 5.2.4 + # A relative path must be ignored rather than normalizing it + # to "/". + next unless /\A\//.match(avalue) + when 'secure', 'httponly' + # RFC 6265 5.2.5, 5.2.6 + avalue = true + end + attrs[aname] = avalue + end until eos? + + len ||= pos - start + + if len > HTTP::Cookie::MAX_LENGTH + @logger.warn("Cookie definition too long: #{name}") if @logger + next + end + + return [name, value, attrs] if value + end + end +end diff --git a/test/test_http_cookie.rb b/test/test_http_cookie.rb index f68d9e4..e46e6e0 100644 --- a/test/test_http_cookie.rb +++ b/test/test_http_cookie.rb @@ -33,7 +33,7 @@ class TestHTTPCookie < Test::Unit::TestCase "Fri, 17 Mar 89 4:01:33", "Fri, 17 Mar 89 4:01 GMT", "Mon Jan 16 16:12 PDT 1989", - "Mon Jan 16 16:12 +0130 1989", + #"Mon Jan 16 16:12 +0130 1989", "6 May 1992 16:41-JST (Wednesday)", #"22-AUG-1993 10:59:12.82", "22-AUG-1993 10:59pm", @@ -42,7 +42,7 @@ class TestHTTPCookie < Test::Unit::TestCase #"Friday, August 04, 1995 3:54 PM", #"06/21/95 04:24:34 PM", #"20/06/95 21:07", - "95-06-08 19:32:48 EDT", + #"95-06-08 19:32:48 EDT", ] dates.each do |date| @@ -373,31 +373,32 @@ class TestHTTPCookie < Test::Unit::TestCase def test_set_cookie_value url = URI.parse('http://rubyforge.org/') - cookie_params = @cookie_params.merge('secure' => 'secure', 'max-age' => 'Max-Age=1000') - cookie_value = 'foo=bar' - date = Time.at(Time.now.to_i) - cookie_params.keys.combine.each do |keys| - cookie_text = [cookie_value, *keys.map { |key| cookie_params[key] }].join('; ') - cookie, = HTTP::Cookie.parse(cookie_text, :origin => url, :date => date) - cookie2, = HTTP::Cookie.parse(cookie.set_cookie_value, :origin => url, :date => date) + ['foo=bar', 'foo="bar"', 'foo="ba\"r baz"'].each { |cookie_value| + cookie_params = @cookie_params.merge('secure' => 'secure', 'max-age' => 'Max-Age=1000') + date = Time.at(Time.now.to_i) + cookie_params.keys.combine.each do |keys| + cookie_text = [cookie_value, *keys.map { |key| cookie_params[key] }].join('; ') + cookie, = HTTP::Cookie.parse(cookie_text, :origin => url, :date => date) + cookie2, = HTTP::Cookie.parse(cookie.set_cookie_value, :origin => url, :date => date) - assert_equal(cookie.name, cookie2.name) - assert_equal(cookie.value, cookie2.value) - assert_equal(cookie.domain, cookie2.domain) - assert_equal(cookie.for_domain?, cookie2.for_domain?) - assert_equal(cookie.path, cookie2.path) - assert_equal(cookie.expires, cookie2.expires) - if keys.include?('max-age') - assert_equal(date + 1000, cookie2.expires) - elsif keys.include?('expires') - assert_equal(@expires, cookie2.expires) - else - assert_equal(nil, cookie2.expires) + assert_equal(cookie.name, cookie2.name) + assert_equal(cookie.value, cookie2.value) + assert_equal(cookie.domain, cookie2.domain) + assert_equal(cookie.for_domain?, cookie2.for_domain?) + assert_equal(cookie.path, cookie2.path) + assert_equal(cookie.expires, cookie2.expires) + if keys.include?('max-age') + assert_equal(date + 1000, cookie2.expires) + elsif keys.include?('expires') + assert_equal(@expires, cookie2.expires) + else + assert_equal(nil, cookie2.expires) + end + assert_equal(cookie.secure?, cookie2.secure?) + assert_equal(cookie.httponly?, cookie2.httponly?) end - assert_equal(cookie.secure?, cookie2.secure?) - assert_equal(cookie.httponly?, cookie2.httponly?) - end + } end def test_parse_cookie_no_spaces