From 6a29c9731452dd695bf3d9c096a674699aa735f4 Mon Sep 17 00:00:00 2001 From: Sami Samhuri Date: Sat, 7 Feb 2026 14:35:26 -0800 Subject: [PATCH] Expand parser coverage and harden substitution semantics --- ruby/shell/builtins.rb | 4 +- ruby/shell/repl.rb | 2 +- ruby/shell/word_expander.rb | 164 +++++++++++++++++++++++++++++------- ruby/test/shell_test.rb | 43 ++++++++++ 4 files changed, 180 insertions(+), 33 deletions(-) diff --git a/ruby/shell/builtins.rb b/ruby/shell/builtins.rb index dc58b36..edf9e7e 100644 --- a/ruby/shell/builtins.rb +++ b/ruby/shell/builtins.rb @@ -3,6 +3,8 @@ require "shell/logger" module Shell class Builtins + EXPORT_VARIABLE_PATTERN = /\$\w+/ + attr_reader :job_control, :logger def initialize(job_control: nil, logger: nil) @@ -83,7 +85,7 @@ module Shell logger.warn "#{red("[ERROR]")} Invalid export command" return -1 else - ENV[name] = value_parts.join("=").gsub(/\$\w+/) { |m| ENV[m[1..]] || "" } + ENV[name] = value_parts.join("=").gsub(EXPORT_VARIABLE_PATTERN) { |m| ENV[m[1..]] || "" } end 0 end diff --git a/ruby/shell/repl.rb b/ruby/shell/repl.rb index 940ec13..e236df3 100644 --- a/ruby/shell/repl.rb +++ b/ruby/shell/repl.rb @@ -70,7 +70,7 @@ module Shell end end result - rescue StandardError => e + rescue => e warn "#{red("[ERROR]")} #{e.message}" -1 end diff --git a/ruby/shell/word_expander.rb b/ruby/shell/word_expander.rb index d387abd..9889289 100644 --- a/ruby/shell/word_expander.rb +++ b/ruby/shell/word_expander.rb @@ -5,10 +5,26 @@ require "shell/string_parser" module Shell class WordExpander - ENV_VAR_REGEX = /\$(?:\{([^}]+)\}|(\w+)\b)/ - DEFAULT_VAR_REGEX = /\A(\w+):-([\s\S]*)\z/ ESCAPED_DOLLAR = "\u0001" ESCAPED_BACKTICK = "\u0002" + GLOB_PATTERN = /[*?\[]/ + SHELLSPLIT_PATTERN = /\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m + DOUBLE_QUOTE_ESCAPES_PATTERN = /\\([$`"\\\n])/ + SINGLE_ESCAPE_PATTERN = /\\(.)/ + TILDE_PREFIX_PATTERN = /^~([^\/]*)/ + VARIABLE_FIRST_CHAR_PATTERN = /[A-Za-z_]/ + VARIABLE_CHAR_PATTERN = /[A-Za-z0-9_]/ + TRAILING_NEWLINES_PATTERN = /\n+\z/ + ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN = /([\\"])/ + ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN = /(\\|["'])/ + WHITESPACE_PATTERN = /\s/ + DIGIT_PATTERN = /\d/ + ARITHMETIC_IDENTIFIER_FIRST_PATTERN = /[A-Za-z_]/ + ARITHMETIC_IDENTIFIER_PATTERN = /[A-Za-z0-9_]/ + ARITHMETIC_OPERATOR_PATTERN = %r{[+\-*/()%]} + BRACE_EXPANSION_PATTERN = /(.*?)\{([^{}]*)\}(.*)/ + SplitWord = Struct.new(:text, :globbed, keyword_init: true) + CommandSubstitutionError = Class.new(StandardError) # Splits the given line into multiple words, performing the following transformations: # @@ -19,12 +35,22 @@ module Shell def expand(line) protected_line = protect_escaped_dollars(line) substituted_line = expand_command_substitution(protected_line) - shellsplit(substituted_line) + shellsplit_tokens(substituted_line) .flat_map do |word| - expanded = expand_variables(word) + expanded = expand_variables(word.text) .tr(ESCAPED_DOLLAR, "$") .tr(ESCAPED_BACKTICK, "`") - expand_braces(expanded) + expand_braces(expanded).map { |part| SplitWord.new(text: part, globbed: word.globbed) } + end + .flat_map do |word| + if word.globbed + [word.text] + elsif GLOB_PATTERN.match?(word.text) + glob_words = expand_globs(word.text) + glob_words.empty? ? [word.text] : glob_words + else + [word.text] + end end end @@ -51,11 +77,15 @@ module Shell # argv = 'here are "two words"'.shellsplit # argv #=> ["here", "are", "two words"] def shellsplit(line) + shellsplit_tokens(line).map(&:text) + end + + def shellsplit_tokens(line) words = [] field = "".dup at_word_start = true found_glob_char = false - line.scan(/\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do |word, sq, dq, esc, garbage, sep| + line.scan(SHELLSPLIT_PATTERN) do |word, sq, dq, esc, garbage, sep| if garbage b = $~.begin(0) line = $~[0] @@ -69,11 +99,11 @@ module Shell # characters when considered special: # # $ ` " \ - field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1')) - found_glob_char = word && word =~ /[*?\[]/ # must be unquoted + field << (word || sq || (dq && dq.gsub(DOUBLE_QUOTE_ESCAPES_PATTERN, '\\1')) || esc.gsub(SINGLE_ESCAPE_PATTERN, '\\1')) + found_glob_char = word&.match?(GLOB_PATTERN) # must be unquoted # Expand tildes at the beginning of unquoted words. if word && at_word_start - field.sub!(/^~([^\/]*)/) do + field.sub!(TILDE_PREFIX_PATTERN) do user = Regexp.last_match(1) user.empty? ? Dir.home : Dir.home(user) rescue ArgumentError @@ -84,9 +114,13 @@ module Shell if sep if found_glob_char glob_words = expand_globs(field) - words += (glob_words.empty? ? [field] : glob_words) + if glob_words.empty? + words << SplitWord.new(text: field, globbed: false) + else + glob_words.each { |glob_word| words << SplitWord.new(text: glob_word, globbed: true) } + end else - words << field + words << SplitWord.new(text: field, globbed: false) end field = "".dup at_word_start = true @@ -101,17 +135,79 @@ module Shell end def expand_variables(value) - value.gsub(ENV_VAR_REGEX) do - raw = Regexp.last_match(2) || Regexp.last_match(1) - if (m = DEFAULT_VAR_REGEX.match(raw)) - name = m[1] - fallback = m[2] - env_value = ENV[name] - (env_value.nil? || env_value.empty?) ? expand_variables(fallback) : env_value + output = +"" + i = 0 + while i < value.length + if value[i] != "$" + output << value[i] + i += 1 + next + end + + if value[i + 1] == "{" + raw, i = read_braced_variable(value, i + 2) + output << resolve_braced_variable(raw) + elsif variable_char?(value[i + 1], first: true) + j = i + 2 + j += 1 while j < value.length && variable_char?(value[j], first: false) + output << ENV.fetch(value[(i + 1)...j]) + i = j else - ENV.fetch(raw) + output << "$" + i += 1 end end + output + end + + def read_braced_variable(value, start_index) + output = +"" + depth = 1 + i = start_index + while i < value.length + c = value[i] + if c == "{" + depth += 1 + elsif c == "}" + depth -= 1 + return [output, i + 1] if depth.zero? + end + output << c + i += 1 + end + raise ArgumentError, "Unmatched ${...}" + end + + def resolve_braced_variable(raw) + name, fallback = split_default_expression(raw) + if fallback + env_value = ENV[name] + (env_value.nil? || env_value.empty?) ? expand_variables(fallback) : env_value + else + ENV.fetch(name) + end + end + + def split_default_expression(raw) + depth = 0 + i = 0 + while i < raw.length - 1 + c = raw[i] + if c == "{" + depth += 1 + elsif c == "}" + depth -= 1 if depth > 0 + elsif depth.zero? && c == ":" && raw[i + 1] == "-" + return [raw[0...i], raw[(i + 2)..]] + end + i += 1 + end + [raw, nil] + end + + def variable_char?(char, first:) + return false if char.nil? + first ? VARIABLE_FIRST_CHAR_PATTERN.match?(char) : VARIABLE_CHAR_PATTERN.match?(char) end def expand_command_substitution(line) @@ -260,9 +356,15 @@ module Shell end def run_command_substitution(command) - stdout, status = Open3.capture2("/bin/sh", "-c", command) - raise Errno::ENOENT, command unless status.success? - stdout = stdout.sub(/\n+\z/, "") + stdout, stderr, status = Open3.capture3("/bin/sh", "-c", command) + unless status.success? + reason = status.exitstatus ? "exit #{status.exitstatus}" : "signal #{status.termsig}" + details = stderr.to_s.strip + message = "command substitution failed (#{reason}): #{command}" + message = "#{message}: #{details}" unless details.empty? + raise CommandSubstitutionError, message + end + stdout = stdout.sub(TRAILING_NEWLINES_PATTERN, "") stdout.tr("\n", " ") end @@ -270,9 +372,9 @@ module Shell escaped = value.gsub("$", ESCAPED_DOLLAR) case context when :double_quoted - escaped.gsub(/([\\"])/, '\\\\\1') + escaped.gsub(ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN, '\\\\\1') when :unquoted - escaped.gsub(/(\\|["'])/, '\\\\\1') + escaped.gsub(ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN, '\\\\\1') else escaped end @@ -289,20 +391,20 @@ module Shell i = 0 while i < expr.length c = expr[i] - if c.match?(/\s/) + if c.match?(WHITESPACE_PATTERN) i += 1 next end - if c.match?(/\d/) + if c.match?(DIGIT_PATTERN) j = i + 1 - j += 1 while j < expr.length && expr[j].match?(/\d/) + j += 1 while j < expr.length && expr[j].match?(DIGIT_PATTERN) tokens << [:number, expr[i...j].to_i] i = j next end - if c.match?(/[A-Za-z_]/) + if c.match?(ARITHMETIC_IDENTIFIER_FIRST_PATTERN) j = i + 1 - j += 1 while j < expr.length && expr[j].match?(/[A-Za-z0-9_]/) + j += 1 while j < expr.length && expr[j].match?(ARITHMETIC_IDENTIFIER_PATTERN) name = expr[i...j] value = ENV[name] value = (value.nil? || value.empty?) ? 0 : value.to_i @@ -310,7 +412,7 @@ module Shell i = j next end - if c.match?(%r{[+\-*/()%]}) + if c.match?(ARITHMETIC_OPERATOR_PATTERN) tokens << [:op, c] i += 1 next @@ -425,7 +527,7 @@ module Shell def expand_braces(word) # Simple, non-nested brace expansion: pre{a,b}post -> preapost, prebpost - match = word.match(/(.*?)\{([^{}]*)\}(.*)/) + match = word.match(BRACE_EXPANSION_PATTERN) return [word] unless match prefix = match[1] diff --git a/ruby/test/shell_test.rb b/ruby/test/shell_test.rb index f01ff0e..fa8a141 100644 --- a/ruby/test/shell_test.rb +++ b/ruby/test/shell_test.rb @@ -123,6 +123,49 @@ class ShellTest < Minitest::Test assert_equal "hi", `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-$(echo hi)}'`.chomp end + def test_expands_glob_from_parameter_default_value + File.write("default_glob_a.txt", TRIVIAL_SHELL_SCRIPT) + File.write("default_glob_b.txt", TRIVIAL_SHELL_SCRIPT) + output = `#{A1_PATH} -c 'printf "%s\n" ${A1_UNSET_GLOB_VAR:-default_glob_*.txt}'`.lines.map(&:chomp).sort + assert_equal ["default_glob_a.txt", "default_glob_b.txt"], output + ensure + FileUtils.rm_f("default_glob_a.txt") + FileUtils.rm_f("default_glob_b.txt") + end + + def test_reports_command_substitution_failure_with_status + _stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "echo $(exit 7)") + refute status.success? + assert_match(/command substitution failed/, stderr) + assert_match(/exit 7/, stderr) + refute_match(/No such file or directory/, stderr) + end + + def test_expands_nested_defaults_with_substitution_and_arithmetic + command = 'echo ${A1_OUTER_UNSET:-${A1_MIDDLE_UNSET:-${A1_INNER_UNSET:-$(printf "%s" "calc_$((2+3))")}}}' + assert_equal "calc_5", `#{A1_PATH} -c '#{command}'`.chomp + end + + def test_matches_sh_backslash_parity_before_dollar_and_backticks + [1, 2, 3, 4].each do |count| + command = "printf \"%s\\n\" #{"\\" * count}$HOME" + shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command) + sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command) + + assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}" + assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}" + end + + [1, 2, 3, 4].each do |count| + command = "printf \"%s\\n\" #{"\\" * count}`echo hi`" + shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command) + sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command) + + assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}" + assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}" + end + end + def test_does_not_expand_escaped_command_substitution_dollar_paren_in_double_quotes assert_equal "$(echo hi)", `#{A1_PATH} -c 'echo "\\$(echo hi)"'`.chomp end