From 4f4e97475b99187bd37b9e2ee3cd2cdce0c01e64 Mon Sep 17 00:00:00 2001 From: Sami Samhuri Date: Sat, 7 Feb 2026 15:18:41 -0800 Subject: [PATCH] [ruby] Modernize Ruby shell parsing and expansion, add C compat test mode (#4) Replace Ruby's old wordexp-like command splitting with a tokenizer and parser that understands ; and && while honoring quotes and nesting. Implement richer expansions for command substitution, arithmetic, parameter defaults (${var:-...}), brace expansion, and escaped dollar/backtick behavior via shared quote-state handling. Expand the test suite with parser/expansion edge cases, escaping parity checks, builtin usage validation, and job-control refresh tests. Keep C green by adding a compat test profile for c/Makefile test and by returning nonzero on builtin failures in -c mode, including clearer `bg` usage output. --- c/Makefile | 2 +- c/builtins.c | 4 +- c/main.c | 33 ++- ruby/Gemfile | 2 +- ruby/Gemfile.lock | 25 +- ruby/shell/builtins.rb | 29 +- ruby/shell/cli.rb | 4 +- ruby/shell/job_control.rb | 11 +- ruby/shell/quote_cursor.rb | 64 +++++ ruby/shell/repl.rb | 41 ++- ruby/shell/string_parser.rb | 365 +++++++++++++++++++++++++ ruby/shell/word_expander.rb | 523 ++++++++++++++++++++++++++++++++++-- ruby/test/shell_test.rb | 228 +++++++++++++++- 13 files changed, 1260 insertions(+), 71 deletions(-) create mode 100644 ruby/shell/quote_cursor.rb create mode 100644 ruby/shell/string_parser.rb diff --git a/c/Makefile b/c/Makefile index fd8d3f8..ef7dcd1 100644 --- a/c/Makefile +++ b/c/Makefile @@ -10,7 +10,7 @@ a1: $(OBJS) $(CC) $(CFLAGS) -o a1 $(OBJS) $(LDFLAGS) -lreadline -lhistory -ltermcap test: a1 - cd ../ruby && A1_PATH=../c/a1 bundle exec rake test + cd ../ruby && A1_PATH=../c/a1 A1_TEST_PROFILE=compat bundle exec rake test clean: rm -rf $(OBJS) a1 diff --git a/c/builtins.c b/c/builtins.c index d6e0224..2c23da6 100644 --- a/c/builtins.c +++ b/c/builtins.c @@ -26,8 +26,8 @@ int builtin_bg(int argc, char **argv) { if (argc < 2) { - fprintf(stderr, "bg: usage 'bg '\n"); - fprintf(stderr, " runs in the background\n"); + fprintf(stderr, "Usage: bg \n"); + fprintf(stderr, "Runs in the background\n"); return -1; } diff --git a/c/main.c b/c/main.c index f726f16..d877374 100644 --- a/c/main.c +++ b/c/main.c @@ -155,6 +155,8 @@ int handle_wordexp_result(int result, char *cmd) { int process_command(char *line, options_t options) { wordexp_t words; + int builtin_result = 0; + bool builtin_executed = false; int result = wordexp(line, &words, WRDE_SHOWERR | WRDE_UNDEF); if (handle_wordexp_result(result, line) && words.we_wordc > 0) { if (options->verbose) { @@ -165,18 +167,26 @@ int process_command(char *line, options_t options) { fprintf(stderr, "}\n"); } /* try the built-in commands */ - if (cmd_matches("bg", words.we_wordv[0])) - builtin_bg(words.we_wordc, words.we_wordv); - else if (cmd_matches("bgkill", words.we_wordv[0])) - builtin_bgkill(words.we_wordc, words.we_wordv); - else if (cmd_matches("bglist", words.we_wordv[0])) - builtin_bglist(); - else if (cmd_matches("cd", words.we_wordv[0])) - builtin_cd(words.we_wordc, words.we_wordv); - else if (cmd_matches("clear", words.we_wordv[0])) + if (cmd_matches("bg", words.we_wordv[0])) { + builtin_result = builtin_bg(words.we_wordc, words.we_wordv); + builtin_executed = true; + } else if (cmd_matches("bgkill", words.we_wordv[0])) { + builtin_result = builtin_bgkill(words.we_wordc, words.we_wordv); + builtin_executed = true; + } else if (cmd_matches("bglist", words.we_wordv[0])) { + builtin_result = builtin_bglist(); + builtin_executed = true; + } else if (cmd_matches("cd", words.we_wordv[0])) { + builtin_result = builtin_cd(words.we_wordc, words.we_wordv); + builtin_executed = true; + } else if (cmd_matches("clear", words.we_wordv[0])) { builtin_clear(); - else if (cmd_matches("pwd", words.we_wordv[0])) + builtin_executed = true; + } else if (cmd_matches("pwd", words.we_wordv[0])) { builtin_pwd(); + builtin_executed = true; + builtin_result = 0; + } else if (cmd_matches("exit", words.we_wordv[0])) { exit(0); } else { @@ -189,6 +199,9 @@ int process_command(char *line, options_t options) { } add_history(line); /* add to the readline history */ wordfree(&words); + if (builtin_executed && builtin_result < 0) { + return builtin_result; + } return 0; } else { return -2; diff --git a/ruby/Gemfile b/ruby/Gemfile index b969649..d153555 100644 --- a/ruby/Gemfile +++ b/ruby/Gemfile @@ -6,4 +6,4 @@ gem "minitest", "~> 6.0" gem "parser", "~> 3.3.10" gem "rake", "~> 13.0" gem "reline", "~> 0.6" -gem "standard", "~> 1.52.0", require: false +gem "standard", "~> 1.53.0", require: false diff --git a/ruby/Gemfile.lock b/ruby/Gemfile.lock index 88cc954..bc63470 100644 --- a/ruby/Gemfile.lock +++ b/ruby/Gemfile.lock @@ -7,14 +7,13 @@ GEM ffi (1.17.3-x86_64-linux-gnu) formatador (1.2.3) reline - guard (2.19.1) + guard (2.20.1) formatador (>= 0.2.4) listen (>= 2.7, < 4.0) logger (~> 1.6) lumberjack (>= 1.0.12, < 2.0) nenv (~> 0.1) notiffany (~> 0.0) - ostruct (~> 0.6) pry (>= 0.13.0) shellany (~> 0.0) thor (>= 0.18.1) @@ -22,10 +21,11 @@ GEM guard rake io-console (0.8.2) - json (2.18.0) + json (2.18.1) language_server-protocol (3.17.0.5) lint_roller (1.1.0) - listen (3.9.0) + listen (3.10.0) + logger rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) logger (1.7.0) @@ -37,12 +37,11 @@ GEM notiffany (0.1.3) nenv (~> 0.1) shellany (~> 0.0) - ostruct (0.6.3) parallel (1.27.0) - parser (3.3.10.0) + parser (3.3.10.1) ast (~> 2.4.1) racc - prism (1.7.0) + prism (1.9.0) pry (0.16.0) coderay (~> 1.1) method_source (~> 1.0) @@ -56,7 +55,7 @@ GEM regexp_parser (2.11.3) reline (0.6.3) io-console (~> 0.5) - rubocop (1.81.7) + rubocop (1.82.1) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -64,7 +63,7 @@ GEM parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.47.1, < 2.0) + rubocop-ast (>= 1.48.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) rubocop-ast (1.49.0) @@ -76,10 +75,10 @@ GEM rubocop-ast (>= 1.47.1, < 2.0) ruby-progressbar (1.13.0) shellany (0.0.1) - standard (1.52.0) + standard (1.53.0) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.0) - rubocop (~> 1.81.7) + rubocop (~> 1.82.0) standard-custom (~> 1.0.0) standard-performance (~> 1.8) standard-custom (1.0.2) @@ -88,7 +87,7 @@ GEM standard-performance (1.9.0) lint_roller (~> 1.1) rubocop-performance (~> 1.26.0) - thor (1.4.0) + thor (1.5.0) unicode-display_width (3.2.0) unicode-emoji (~> 4.1) unicode-emoji (4.2.0) @@ -107,7 +106,7 @@ DEPENDENCIES parser (~> 3.3.10) rake (~> 13.0) reline (~> 0.6) - standard (~> 1.52.0) + standard (~> 1.53.0) BUNDLED WITH 4.0.3 diff --git a/ruby/shell/builtins.rb b/ruby/shell/builtins.rb index 4a39cd9..3479399 100644 --- a/ruby/shell/builtins.rb +++ b/ruby/shell/builtins.rb @@ -3,6 +3,8 @@ require "shell/logger" module Shell class Builtins + EXPORT_VARIABLE_PATTERN = /\$\w+/ + attr_reader :job_control, :logger def initialize(job_control: nil, logger: nil) @@ -24,6 +26,11 @@ module Shell ################# def builtin_bg(args) + if args.empty? + logger.warn "Usage: bg " + return -1 + end + cmd = args.shift job_control.exec_command(cmd, args, background: true) end @@ -51,17 +58,35 @@ module Shell end def builtin_cd(args) - Dir.chdir args.first + dir = args.first + oldpwd = Dir.pwd + target = case dir + in nil + Dir.home + in "-" + ENV["OLDPWD"] || oldpwd + else + dir + end + Dir.chdir target + ENV["OLDPWD"] = oldpwd + ENV["PWD"] = Dir.pwd 0 end def builtin_export(args) + if args.count != 1 || args.first.nil? || !args.first.include?("=") + logger.warn "Usage: export NAME=value" + return -1 + end + # only supports one variable and doesn't support quoting name, *value_parts = args.first.strip.split("=") if name.nil? || name.empty? logger.warn "#{red("[ERROR]")} Invalid export command" + return -1 else - ENV[name] = value_parts.join("=").gsub(/\$\w+/) { |m| ENV[m[1..]] || "" } + ENV[name] = value_parts.join("=").gsub(EXPORT_VARIABLE_PATTERN) { ENV[it[1..]] || "" } end 0 end diff --git a/ruby/shell/cli.rb b/ruby/shell/cli.rb index dde9cf5..e9deebc 100644 --- a/ruby/shell/cli.rb +++ b/ruby/shell/cli.rb @@ -21,7 +21,9 @@ module Shell if options[:command] logger.verbose "Executing command: #{options[:command]}" print_logs - exit repl.process_command(options[:command]) + status = repl.process_command(options[:command]) + print_logs + exit status elsif $stdin.isatty repl.start(options: options) end diff --git a/ruby/shell/job_control.rb b/ruby/shell/job_control.rb index f116895..156d79f 100644 --- a/ruby/shell/job_control.rb +++ b/ruby/shell/job_control.rb @@ -10,9 +10,10 @@ module Shell attr_reader :logger - def initialize(logger: nil) + def initialize(logger: nil, refresh_line: nil) @jobs_by_pid = {} @logger = logger || Logger.instance + @refresh_line = refresh_line || -> { Readline.refresh_line } end def exec_command(cmd, args, background: false) @@ -67,7 +68,11 @@ module Shell def trap_sigchld # handler for SIGCHLD when a child's state changes Signal.trap("CHLD") do |_signo| - pid = Process.waitpid(-1, Process::WNOHANG) + pid = begin + Process.waitpid(-1, Process::WNOHANG) + rescue Errno::ECHILD + nil + end if pid.nil? # no-op elsif (job = @jobs_by_pid[pid]) @@ -79,7 +84,7 @@ module Shell else warn "\n#{yellow("[WARN]")} No job found for child with PID #{pid}" end - Readline.refresh_line + @refresh_line.call end end diff --git a/ruby/shell/quote_cursor.rb b/ruby/shell/quote_cursor.rb new file mode 100644 index 0000000..320578c --- /dev/null +++ b/ruby/shell/quote_cursor.rb @@ -0,0 +1,64 @@ +module Shell + # Shared quote/escape state machine for parsers that walk shell-like strings. + class QuoteCursor + attr_reader :state + + def initialize(state: :unquoted) + @state = state + end + + def unquoted? + state == :unquoted + end + + # Consumes one logical unit from line[index], which may be one character + # or an escape pair (e.g., \" or \\$), and updates internal quote state. + # Returns [segment, next_index]. + def consume(line, index) + c = line[index] + + case state + when :unquoted + consume_unquoted(line, index, c) + when :single_quoted + consume_single_quoted(index, c) + when :double_quoted + consume_double_quoted(line, index, c) + else + raise "Unknown state #{state}" + end + end + + private + + def consume_unquoted(line, index, c) + case c + when "'" + @state = :single_quoted + when "\"" + @state = :double_quoted + when "\\" + if index + 1 < line.length + return [line[index, 2], index + 2] + end + end + [c, index + 1] + end + + def consume_single_quoted(index, c) + @state = :unquoted if c == "'" + [c, index + 1] + end + + def consume_double_quoted(line, index, c) + if c == "\\" + if index + 1 < line.length + return [line[index, 2], index + 2] + end + elsif c == "\"" + @state = :unquoted + end + [c, index + 1] + end + end +end diff --git a/ruby/shell/repl.rb b/ruby/shell/repl.rb index 94bf78c..0263d84 100644 --- a/ruby/shell/repl.rb +++ b/ruby/shell/repl.rb @@ -8,6 +8,7 @@ require "shell/builtins" require "shell/colours" require "shell/job_control" require "shell/logger" +require "shell/string_parser" require "shell/word_expander" module Shell @@ -50,24 +51,38 @@ module Shell return 0 if line.strip.empty? # no input, no-op logger.verbose "Processing command: #{line.inspect}" - args = word_expander.expand(line) - cmd = args.shift - logger.verbose "Parsed command: #{cmd} #{args.inspect}" - if builtins.builtin?(cmd) - logger.verbose "Executing builtin #{cmd}" - builtins.exec(cmd, args) - else - logger.verbose "Shelling out for #{cmd}" - job_control.exec_command(cmd, args) + commands = parse_line(line) + result = 0 + commands.each do |entry| + case entry + in StringParser::Command[text:, op:] + command = text + next if command.strip.empty? + next if op == :and && result != 0 + + args = word_expander.expand(command) + program = args.shift + logger.verbose "Parsed command: #{program} #{args.inspect}" + if builtins.builtin?(program) + logger.verbose "Executing builtin #{program}" + result = builtins.exec(program, args) + else + logger.verbose "Shelling out for #{program}" + result = job_control.exec_command(program, args) + end + else + raise ArgumentError, "Unknown parsed command node: #{entry.inspect}" + end end - rescue Errno => e + result + rescue => e warn "#{red("[ERROR]")} #{e.message}" -1 end # Looks like this: /path/to/somewhere% - def prompt(pwd) - "#{blue(pwd)}#{white("%")} #{CLEAR}" - end + def prompt(pwd) = "#{blue(pwd)}#{white("%")} #{CLEAR}" + + def parse_line(line) = StringParser.split_commands(line) end end diff --git a/ruby/shell/string_parser.rb b/ruby/shell/string_parser.rb new file mode 100644 index 0000000..6030445 --- /dev/null +++ b/ruby/shell/string_parser.rb @@ -0,0 +1,365 @@ +module Shell + class StringParser + Command = Data.define(:text, :op) + Token = Data.define(:type, :value) + + class Scanner + def initialize(line, index: 0) + @line = line + @index = index + end + + attr_reader :index + + def tokenize_command_list + tokens = [] + segment_start = index + + until eof? + c = current_char + if c == ";" + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens << Token.new(type: :separator, value: :always) + advance + segment_start = index + next + end + + if c == "&" && peek(1) == "&" + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens << Token.new(type: :separator, value: :and) + advance(2) + segment_start = index + next + end + + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens + end + + # Reads the contents and end-index for a command substitution body, where + # index points to the first character after "$(". + def read_dollar_paren_body + output = +"" + depth = 1 + + until eof? + c = current_char + + if c == "\\" + output << read_escape + next + end + + if c == "'" + output << read_single_quoted + next + end + + if c == "\"" + output << read_double_quoted + next + end + + if c == "`" + output << read_backtick + next + end + + if c == "$" && peek(1) == "(" + if peek(2) == "(" + output << read_arithmetic_substitution + else + output << "$(" + advance(2) + depth += 1 + end + next + end + + if c == "(" + output << c + depth += 1 + advance + next + end + + if c == ")" + depth -= 1 + if depth.zero? + return [output, index + 1] + end + output << c + advance + next + end + + output << c + advance + end + + raise ArgumentError, "Unmatched $(...)" + end + + private + + def eof? + index >= @line.length + end + + def current_char + @line[index] + end + + def peek(offset) + @line[index + offset] + end + + def advance(count = 1) + @index += count + end + + def advance_escape + advance + advance unless eof? + end + + def skip_single_quoted + advance # opening quote + until eof? + if current_char == "'" + advance + return + end + advance + end + raise ArgumentError, "Unmatched quote" + end + + def skip_double_quoted + advance # opening quote + until eof? + c = current_char + case c + when "\\" + advance_escape + when "\"" + advance + return + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + raise ArgumentError, "Unmatched quote" + end + + def skip_backtick + advance # opening backtick + until eof? + c = current_char + case c + when "\\" + advance_escape + when "`" + advance + return + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + raise ArgumentError, "Unmatched backtick" + end + + def skip_command_substitution + advance(2) # consume "$(" + depth = 1 + + until eof? + c = current_char + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + advance(2) + depth += 1 + end + else + advance + end + when "(" + advance + depth += 1 + when ")" + advance + depth -= 1 + return if depth.zero? + else + advance + end + end + + raise ArgumentError, "Unmatched $(...)" + end + + def skip_arithmetic_substitution + advance(3) # consume "$((" + depth = 1 + + until eof? + c = current_char + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + advance(3) + depth += 1 + else + skip_command_substitution + end + else + advance + end + when ")" + if peek(1) == ")" + advance(2) + depth -= 1 + return if depth.zero? + else + advance + end + else + advance + end + end + + raise ArgumentError, "Unmatched $((...))" + end + + def read_escape + start = index + advance_escape + @line[start...index] + end + + def read_single_quoted + start = index + skip_single_quoted + @line[start...index] + end + + def read_double_quoted + start = index + skip_double_quoted + @line[start...index] + end + + def read_backtick + start = index + skip_backtick + @line[start...index] + end + + def read_arithmetic_substitution + start = index + skip_arithmetic_substitution + @line[start...index] + end + end + + class << self + def split_commands(line) + commands = [] + next_op = :always + tokens = Scanner.new(line).tokenize_command_list + + tokens.each do |token| + case token + in Token[type: :text, value:] + if next_op == :and && value.strip.empty? + raise ArgumentError, "syntax error: expected command after `&&`" + end + commands << Command.new(text: value, op: next_op) + next_op = :always + in Token[type: :separator, value: :and] + if commands.empty? || commands.last.text.strip.empty? + raise ArgumentError, "syntax error near unexpected token `&&`" + end + next_op = :and + in Token[type: :separator, value: :always] + next_op = :always + else + raise ArgumentError, "Unknown token type: #{token.type}" + end + end + + commands + end + + def read_dollar_paren(line, start_index) = Scanner.new(line, index: start_index).read_dollar_paren_body + end + end +end diff --git a/ruby/shell/word_expander.rb b/ruby/shell/word_expander.rb index aaf30a3..2eef5a8 100644 --- a/ruby/shell/word_expander.rb +++ b/ruby/shell/word_expander.rb @@ -1,8 +1,29 @@ -require "shellwords" +require "open3" +require "shell/quote_cursor" +require "shell/string_parser" module Shell class WordExpander - ENV_VAR_REGEX = /\$(?:\{([^}]+)\}|(\w+)\b)/ + ESCAPED_DOLLAR = "\u0001" + ESCAPED_BACKTICK = "\u0002" + GLOB_PATTERN = /[*?\[]/ + SHELLSPLIT_PATTERN = /\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m + DOUBLE_QUOTE_ESCAPES_PATTERN = /\\([$`"\\\n])/ + SINGLE_ESCAPE_PATTERN = /\\(.)/ + TILDE_PREFIX_PATTERN = /^~([^\/]*)/ + VARIABLE_FIRST_CHAR_PATTERN = /[A-Za-z_]/ + VARIABLE_CHAR_PATTERN = /[A-Za-z0-9_]/ + TRAILING_NEWLINES_PATTERN = /\n+\z/ + ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN = /([\\"])/ + ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN = /(\\|["'])/ + WHITESPACE_PATTERN = /\s/ + DIGIT_PATTERN = /\d/ + ARITHMETIC_IDENTIFIER_FIRST_PATTERN = /[A-Za-z_]/ + ARITHMETIC_IDENTIFIER_PATTERN = /[A-Za-z0-9_]/ + ARITHMETIC_OPERATOR_PATTERN = %r{[+\-*/()%]} + BRACE_EXPANSION_PATTERN = /(.*?)\{([^{}]*)\}(.*)/ + SplitWord = Data.define(:text, :globbed) + CommandSubstitutionError = Class.new(StandardError) # Splits the given line into multiple words, performing the following transformations: # @@ -11,23 +32,33 @@ module Shell # - Tilde expansion, which means that ~ is expanded to $HOME # - Glob expansion on files and directories def expand(line) - shellsplit(line) - .map do |word| - word - .gsub(ENV_VAR_REGEX) do - name = Regexp.last_match(2) || Regexp.last_match(1) - ENV.fetch(name) - end - # TODO: expand globs + protected_line = protect_escaped_dollars(line) + substituted_line = expand_command_substitution(protected_line) + shellsplit_tokens(substituted_line) + .flat_map do |word| + expanded = expand_variables(word.text) + .tr(ESCAPED_DOLLAR, "$") + .tr(ESCAPED_BACKTICK, "`") + expand_braces(expanded).map { SplitWord.new(text: it, globbed: word.globbed) } + end + .flat_map do |word| + if word.globbed + [word.text] + elsif GLOB_PATTERN.match?(word.text) + glob_words = expand_globs(word.text) + glob_words.empty? ? [word.text] : glob_words + else + [word.text] + end end end - # Lifted directly from Ruby 4.0.0. + # Adapted from Ruby's Shellwords splitting logic. # # Splits a string into an array of tokens in the same way the UNIX # Bourne shell does. # - # argv = Shellwords.split('here are "two words"') + # argv = shellsplit('here are "two words"') # argv #=> ["here", "are", "two words"] # # +line+ must not contain NUL characters because of nature of @@ -37,7 +68,7 @@ module Shell # metacharacters except for the single and double quotes and # backslash are not treated as such. # - # argv = Shellwords.split('ruby my_prog.rb | less') + # argv = shellsplit('ruby my_prog.rb | less') # argv #=> ["ruby", "my_prog.rb", "|", "less"] # # String#shellsplit is a shortcut for this function. @@ -45,12 +76,15 @@ module Shell # argv = 'here are "two words"'.shellsplit # argv #=> ["here", "are", "two words"] def shellsplit(line) + shellsplit_tokens(line).map(&:text) + end + + def shellsplit_tokens(line) words = [] field = "".dup at_word_start = true found_glob_char = false - line.scan(/\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do - |word, sq, dq, esc, garbage, sep| + line.scan(SHELLSPLIT_PATTERN) do |word, sq, dq, esc, garbage, sep| if garbage b = $~.begin(0) line = $~[0] @@ -64,19 +98,28 @@ module Shell # characters when considered special: # # $ ` " \ - field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1')) - found_glob_char = word && word =~ /[*?\[]/ # must be unquoted + field << (word || sq || (dq && dq.gsub(DOUBLE_QUOTE_ESCAPES_PATTERN, '\\1')) || esc.gsub(SINGLE_ESCAPE_PATTERN, '\\1')) + found_glob_char = word&.match?(GLOB_PATTERN) # must be unquoted # Expand tildes at the beginning of unquoted words. if word && at_word_start - field.sub!(/^~/, Dir.home) + field.sub!(TILDE_PREFIX_PATTERN) do + user = Regexp.last_match(1) + user.empty? ? Dir.home : Dir.home(user) + rescue ArgumentError + "~#{user}" + end end at_word_start = false if sep if found_glob_char glob_words = expand_globs(field) - words += (glob_words.empty? ? [field] : glob_words) + if glob_words.empty? + words << SplitWord.new(text: field, globbed: false) + else + glob_words.each { words << SplitWord.new(text: it, globbed: true) } + end else - words << field + words << SplitWord.new(text: field, globbed: false) end field = "".dup at_word_start = true @@ -89,5 +132,445 @@ module Shell def expand_globs(word) Dir.glob(word) end + + def expand_variables(value) + output = +"" + i = 0 + while i < value.length + if value[i] != "$" + output << value[i] + i += 1 + next + end + + if value[i + 1] == "{" + raw, i = read_braced_variable(value, i + 2) + output << resolve_braced_variable(raw) + elsif variable_char?(value[i + 1], first: true) + j = i + 2 + j += 1 while j < value.length && variable_char?(value[j], first: false) + output << ENV.fetch(value[(i + 1)...j]) + i = j + else + output << "$" + i += 1 + end + end + output + end + + def read_braced_variable(value, start_index) + output = +"" + depth = 1 + i = start_index + while i < value.length + c = value[i] + if c == "{" + depth += 1 + elsif c == "}" + depth -= 1 + return [output, i + 1] if depth.zero? + end + output << c + i += 1 + end + raise ArgumentError, "Unmatched ${...}" + end + + def resolve_braced_variable(raw) + name, fallback = split_default_expression(raw) + if fallback + env_value = ENV[name] + (env_value.nil? || env_value.empty?) ? expand_variables(fallback) : env_value + else + ENV.fetch(name) + end + end + + def split_default_expression(raw) + depth = 0 + i = 0 + while i < raw.length - 1 + c = raw[i] + if c == "{" + depth += 1 + elsif c == "}" + depth -= 1 if depth > 0 + elsif depth.zero? && c == ":" && raw[i + 1] == "-" + return [raw[0...i], raw[(i + 2)..]] + end + i += 1 + end + [raw, nil] + end + + def variable_char?(char, first:) + return false if char.nil? + first ? VARIABLE_FIRST_CHAR_PATTERN.match?(char) : VARIABLE_CHAR_PATTERN.match?(char) + end + + def expand_command_substitution(line) + output = +"" + i = 0 + cursor = QuoteCursor.new + while i < line.length + c = line[i] + if cursor.unquoted? + case c + when "`" + cmd, i = read_backtick(line, i + 1) + output << escape_substitution_output(run_command_substitution(cmd), :unquoted) + when "$" + if line[i + 1] == "(" + if line[i + 2] == "(" + expr, i = read_arithmetic(line, i + 3) + output << expand_arithmetic(expr) + else + cmd, i = read_dollar_paren(line, i + 2) + output << escape_substitution_output(run_command_substitution(cmd), :unquoted) + end + else + output << c + i += 1 + end + when "\\" + if i + 1 < line.length + escaped = line[i + 1] + if escaped == "$" + output << ESCAPED_DOLLAR + i += 2 + elsif escaped == "`" + output << ESCAPED_BACKTICK + i += 2 + else + segment, i = cursor.consume(line, i) + output << segment + end + else + segment, i = cursor.consume(line, i) + output << segment + end + else + segment, i = cursor.consume(line, i) + output << segment + end + + elsif cursor.state == :double_quoted + case c + when "\\" + if i + 1 < line.length + escaped = line[i + 1] + if escaped == "$" || escaped == "`" + output << escaped_replacement(escaped) + else + output << "\\" + output << escaped + end + i += 2 + else + segment, i = cursor.consume(line, i) + output << segment + end + when "`" + cmd, i = read_backtick(line, i + 1) + output << escape_substitution_output(run_command_substitution(cmd), :double_quoted) + when "$" + if line[i + 1] == "(" + if line[i + 2] == "(" + expr, i = read_arithmetic(line, i + 3) + output << expand_arithmetic(expr) + else + cmd, i = read_dollar_paren(line, i + 2) + output << escape_substitution_output(run_command_substitution(cmd), :double_quoted) + end + else + segment, i = cursor.consume(line, i) + output << segment + end + else + segment, i = cursor.consume(line, i) + output << segment + end + + else + segment, i = cursor.consume(line, i) + output << segment + end + end + output + end + + def read_backtick(line, start_index) + output = +"" + i = start_index + while i < line.length + c = line[i] + if c == "`" + return [output, i + 1] + end + if c == "\\" + if i + 1 < line.length + output << line[i + 1] + i += 2 + next + end + end + output << c + i += 1 + end + raise ArgumentError, "Unmatched backtick" + end + + def read_dollar_paren(line, start_index) + StringParser.read_dollar_paren(line, start_index) + end + + def read_arithmetic(line, start_index) + output = +"" + i = start_index + depth = 1 + while i < line.length + c = line[i] + if c == "(" + depth += 1 + output << c + elsif c == ")" + depth -= 1 + if depth.zero? + if line[i + 1] == ")" + return [output, i + 2] + else + depth += 1 + output << c + end + else + output << c + end + else + output << c + end + i += 1 + end + raise ArgumentError, "Unmatched $((...))" + end + + def run_command_substitution(command) + stdout, stderr, status = Open3.capture3("/bin/sh", "-c", command) + unless status.success? + reason = status.exitstatus ? "exit #{status.exitstatus}" : "signal #{status.termsig}" + details = stderr.to_s.strip + message = "command substitution failed (#{reason}): #{command}" + message = "#{message}: #{details}" unless details.empty? + raise CommandSubstitutionError, message + end + stdout = stdout.sub(TRAILING_NEWLINES_PATTERN, "") + stdout.tr("\n", " ") + end + + def escape_substitution_output(value, context) + escaped = value.gsub("$", ESCAPED_DOLLAR) + case context + when :double_quoted + escaped.gsub(ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN, '\\\\\1') + when :unquoted + escaped.gsub(ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN, '\\\\\1') + else + escaped + end + end + + def expand_arithmetic(expr) + tokens = tokenize_arithmetic(expr) + rpn = arithmetic_to_rpn(tokens) + evaluate_rpn(rpn).to_s + end + + def tokenize_arithmetic(expr) + tokens = [] + i = 0 + while i < expr.length + c = expr[i] + if c.match?(WHITESPACE_PATTERN) + i += 1 + next + end + if c.match?(DIGIT_PATTERN) + j = i + 1 + j += 1 while j < expr.length && expr[j].match?(DIGIT_PATTERN) + tokens << [:number, expr[i...j].to_i] + i = j + next + end + if c.match?(ARITHMETIC_IDENTIFIER_FIRST_PATTERN) + j = i + 1 + j += 1 while j < expr.length && expr[j].match?(ARITHMETIC_IDENTIFIER_PATTERN) + name = expr[i...j] + value = ENV[name] + value = (value.nil? || value.empty?) ? 0 : value.to_i + tokens << [:number, value] + i = j + next + end + if c.match?(ARITHMETIC_OPERATOR_PATTERN) + tokens << [:op, c] + i += 1 + next + end + raise ArgumentError, "Invalid arithmetic expression: #{expr}" + end + tokens + end + + def arithmetic_to_rpn(tokens) + output = [] + ops = [] + prev_type = nil + tokens.each do |type, value| + if type == :number + output << [:number, value] + prev_type = :number + next + end + + op = value + if op == "(" + ops << op + prev_type = :lparen + next + end + if op == ")" + while (top = ops.pop) + break if top == "(" + output << [:op, top] + end + raise ArgumentError, "Unmatched ) in arithmetic expression" if top != "(" + prev_type = :rparen + next + end + + if op == "-" && (prev_type.nil? || prev_type == :op || prev_type == :lparen) + op = "u-" + elsif op == "+" && (prev_type.nil? || prev_type == :op || prev_type == :lparen) + op = "u+" + end + + while !ops.empty? && precedence(ops.last) >= precedence(op) + output << [:op, ops.pop] + end + ops << op + prev_type = :op + end + + while (top = ops.pop) + raise ArgumentError, "Unmatched ( in arithmetic expression" if top == "(" + output << [:op, top] + end + output + end + + def precedence(op) + case op + when "u+", "u-" + 3 + when "*", "/", "%" + 2 + when "+", "-" + 1 + else + 0 + end + end + + def evaluate_rpn(rpn) + stack = [] + rpn.each do |type, value| + if type == :number + stack << value + next + end + + case value + when "u+" + raise ArgumentError, "Invalid arithmetic expression" if stack.empty? + stack << stack.pop + when "u-" + raise ArgumentError, "Invalid arithmetic expression" if stack.empty? + stack << -stack.pop + else + b = stack.pop + a = stack.pop + raise ArgumentError, "Invalid arithmetic expression" if a.nil? || b.nil? + stack << apply_operator(a, b, value) + end + end + raise ArgumentError, "Invalid arithmetic expression" unless stack.length == 1 + stack[0] + end + + def apply_operator(a, b, op) + case op + when "+" + a + b + when "-" + a - b + when "*" + a * b + when "/" + (b == 0) ? 0 : a / b + when "%" + (b == 0) ? 0 : a % b + else + raise ArgumentError, "Invalid arithmetic expression" + end + end + + def expand_braces(word) + # Simple, non-nested brace expansion: pre{a,b}post -> preapost, prebpost + match = word.match(BRACE_EXPANSION_PATTERN) + return [word] unless match + + prefix = match[1] + body = match[2] + suffix = match[3] + return [word] unless body.include?(",") + + parts = body.split(",", -1) + parts.flat_map { expand_braces(prefix + it + suffix) } + end + + def escaped_replacement(char) + case char + when "$" + ESCAPED_DOLLAR + when "`" + ESCAPED_BACKTICK + else + char + end + end + + def protect_escaped_dollars(line) + output = +"" + i = 0 + while i < line.length + if line.getbyte(i) == "\\".ord + j = i + 1 + j += 1 while j < line.length && line.getbyte(j) == "\\".ord + count = j - i + if j < line.length && line.getbyte(j) == "$".ord && count.odd? + output << ("\\" * (count - 1)) + output << ESCAPED_DOLLAR + i = j + 1 + else + output << ("\\" * count) + i = j + end + else + output << line[i] + i += 1 + end + end + output + end end end diff --git a/ruby/test/shell_test.rb b/ruby/test/shell_test.rb index 59daea6..4f111c1 100644 --- a/ruby/test/shell_test.rb +++ b/ruby/test/shell_test.rb @@ -1,9 +1,16 @@ require "minitest/autorun" +require "etc" +require "open3" +require "timeout" +$LOAD_PATH.unshift(File.expand_path("..", __dir__)) +require_relative "../shell/job_control" +require_relative "../shell/logger" class ShellTest < Minitest::Test TRIVIAL_SHELL_SCRIPT = "#!/bin/sh\ntrue".freeze A1_PATH = ENV.fetch("A1_PATH", "./a1").freeze + COMPAT_PROFILE = ENV["A1_TEST_PROFILE"] == "compat" def setup FileUtils.mkdir_p("test_bin") @@ -17,6 +24,12 @@ class ShellTest < Minitest::Test "#!/bin/sh\necho '#{code}'" end + def requires_extended_shell!(feature) + return unless COMPAT_PROFILE + + skip "requires extended shell feature: #{feature}" + end + def test_expands_environment_variables assert_equal Dir.home, `#{A1_PATH} -c 'echo $HOME'`.chomp assert_equal Dir.home, `#{A1_PATH} -c 'echo ${HOME}'`.chomp @@ -39,6 +52,10 @@ class ShellTest < Minitest::Test assert_equal "a b", `#{A1_PATH} -c 'echo \"a b\"'`.chomp end + def test_respects_escaped_double_quote_in_double_quotes + assert_equal "a\"b", `#{A1_PATH} -c 'echo \"a\\\"b\"'`.chomp + end + def test_respects_single_quotes assert_equal "a b", `#{A1_PATH} -c \"echo 'a b'\"`.chomp end @@ -57,6 +74,189 @@ class ShellTest < Minitest::Test FileUtils.rm_f("globtest_b.txt") end + def test_does_not_reglob_expanded_paths + File.write("globspecial_a.txt", TRIVIAL_SHELL_SCRIPT) + File.write("globspecial_[a].txt", TRIVIAL_SHELL_SCRIPT) + output = `#{A1_PATH} -c 'echo globspecial_*.txt'`.chomp.split + assert_equal ["globspecial_[a].txt", "globspecial_a.txt"], output.sort + ensure + FileUtils.rm_f("globspecial_a.txt") + FileUtils.rm_f("globspecial_[a].txt") + end + + def test_does_not_expand_escaped_dollar + assert_equal "$HOME", `#{A1_PATH} -c 'echo \\$HOME'`.chomp + end + + def test_expands_brace_expansion + requires_extended_shell!("brace expansion") + assert_equal "a b", `#{A1_PATH} -c 'echo {a,b}'`.chomp + end + + def test_expands_command_substitution_backticks + assert_equal "hi", %x(#{A1_PATH} -c 'echo `echo hi`').chomp + end + + def test_expands_command_substitution_dollar_paren + assert_equal "hi", `#{A1_PATH} -c 'echo $(echo hi)'`.chomp + end + + def test_keeps_control_operators_inside_command_substitution + requires_extended_shell!("nested command parsing in substitutions") + semicolon_stdout, semicolon_stderr, semicolon_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi; echo bye)") + assert semicolon_status.success?, semicolon_stderr + assert_equal "hi bye\n", semicolon_stdout + + and_stdout, and_stderr, and_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi && echo bye)") + assert and_status.success?, and_stderr + assert_equal "hi bye\n", and_stdout + end + + def test_expands_command_substitution_with_escaped_quote + requires_extended_shell!("escaped quote handling in substitutions") + assert_equal "a\"b", `#{A1_PATH} -c 'echo $(printf \"%s\" \"a\\\"b\")'`.chomp + end + + def test_expands_arithmetic + assert_equal "3", `#{A1_PATH} -c 'echo $((1 + 2))'`.chomp + end + + def test_expands_arithmetic_with_variables + requires_extended_shell!("arithmetic variable lookup") + assert_equal "3", `A1_NUM=2 #{A1_PATH} -c 'echo $((A1_NUM + 1))'`.chomp + end + + def test_expands_tilde_user + user = Etc.getlogin + skip "no login user" unless user + assert_equal Dir.home(user), `#{A1_PATH} -c 'echo ~#{user}'`.chomp + end + + def test_expands_parameter_default_value + requires_extended_shell!("${var:-fallback}") + assert_equal "fallback", `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-fallback}'`.chomp + end + + def test_expands_parameter_default_value_with_variable_reference + requires_extended_shell!("${var:-$OTHER}") + assert_equal Dir.home, `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-$HOME}'`.chomp + end + + def test_expands_parameter_default_value_with_command_substitution + requires_extended_shell!("${var:-$(...)}") + assert_equal "hi", `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-$(echo hi)}'`.chomp + end + + def test_expands_glob_from_parameter_default_value + requires_extended_shell!("glob expansion from parameter defaults") + File.write("default_glob_a.txt", TRIVIAL_SHELL_SCRIPT) + File.write("default_glob_b.txt", TRIVIAL_SHELL_SCRIPT) + output = `#{A1_PATH} -c 'printf "%s\n" ${A1_UNSET_GLOB_VAR:-default_glob_*.txt}'`.lines.map(&:chomp).sort + assert_equal ["default_glob_a.txt", "default_glob_b.txt"], output + ensure + FileUtils.rm_f("default_glob_a.txt") + FileUtils.rm_f("default_glob_b.txt") + end + + def test_reports_command_substitution_failure_with_status + requires_extended_shell!("command substitution error propagation") + _stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "echo $(exit 7)") + refute status.success? + assert_match(/command substitution failed/, stderr) + assert_match(/exit 7/, stderr) + refute_match(/No such file or directory/, stderr) + end + + def test_expands_nested_defaults_with_substitution_and_arithmetic + requires_extended_shell!("nested defaults and arithmetic") + command = 'echo ${A1_OUTER_UNSET:-${A1_MIDDLE_UNSET:-${A1_INNER_UNSET:-$(printf "%s" "calc_$((2+3))")}}}' + assert_equal "calc_5", `#{A1_PATH} -c '#{command}'`.chomp + end + + def test_matches_sh_backslash_parity_before_dollar_and_backticks + [1, 2, 3, 4].each do |count| + command = "printf \"%s\\n\" #{"\\" * count}$HOME" + shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command) + sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command) + + assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}" + assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}" + end + + [1, 2, 3, 4].each do |count| + command = "printf \"%s\\n\" #{"\\" * count}`echo hi`" + shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command) + sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command) + + assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}" + assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}" + end + end + + def test_does_not_expand_escaped_command_substitution_dollar_paren_in_double_quotes + assert_equal "$(echo hi)", `#{A1_PATH} -c 'echo "\\$(echo hi)"'`.chomp + end + + def test_does_not_expand_escaped_command_substitution_backticks_in_double_quotes + assert_equal "`echo hi`", %x(#{A1_PATH} -c 'echo "\\`echo hi\\`"').chomp + end + + def test_combines_expansions_in_defaults_and_subcommands + requires_extended_shell!("composed substitutions and defaults") + File.write("combo_a.txt", TRIVIAL_SHELL_SCRIPT) + File.write("combo_b.txt", TRIVIAL_SHELL_SCRIPT) + + command = [ + "printf \"<%s>\\n\"", + "${A1_UNSET_COMPLEX_TEST_VAR:-$(printf \"%s\" \"default_$((1+2))\")}", + "$(printf \"%s\" \"combo_*.txt\")", + "\"$(printf \"%s\" \"quoted value\")\"", + "{left,right}", + "~" + ].join(" ") + output = `#{A1_PATH} -c '#{command}'`.lines.map(&:chomp) + + assert_equal "", output[0] + assert_equal ["", ""], output[1, 2].sort + assert_equal "", output[3] + assert_equal "", output[4] + assert_equal "", output[5] + assert_equal "<#{Dir.home}>", output[6] + assert_equal 7, output.length + ensure + FileUtils.rm_f("combo_a.txt") + FileUtils.rm_f("combo_b.txt") + end + + def test_reports_parse_errors_without_ruby_backtrace + _stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "echo \"unterminated") + refute status.success? + refute_match(/\.rb:\d+:in /, stderr) + end + + def test_export_without_args_does_not_raise_nomethoderror + _stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "export") + refute status.success? + refute_match(/NoMethodError|undefined method/, stderr) + end + + def test_bg_without_command_reports_usage_error + _stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "bg") + refute status.success? + assert_match(/Usage: bg /, stderr) + end + + def test_rejects_empty_command_around_and_operator + requires_extended_shell!("top-level && parsing") + _stdout1, stderr1, status1 = Open3.capture3(A1_PATH, "-c", "&& echo hi") + refute status1.success? + assert_match(/syntax/i, stderr1) + + _stdout2, stderr2, status2 = Open3.capture3(A1_PATH, "-c", "echo hi &&") + refute status2.success? + assert_match(/syntax/i, stderr2) + end + ################################# ### Execution and job control ### ################################# @@ -100,7 +300,21 @@ class ShellTest < Minitest::Test end def test_refreshes_readline_after_bg_execution - skip "unimplemented" + called = false + job_control = Shell::JobControl.new( + logger: Shell::Logger.instance, + refresh_line: -> { called = true } + ) + previous = job_control.trap_sigchld + begin + job_control.exec_command("echo", ["hello"], background: true) + Timeout.timeout(2) do + sleep 0.01 until called + end + assert called + ensure + Signal.trap("CHLD", previous) + end end ######################### @@ -108,19 +322,23 @@ class ShellTest < Minitest::Test ######################### def test_builtin_cd_no_args - skip "cannot easily implement without sequencing with ; or &&" + requires_extended_shell!("multi-command sequencing with ;") + assert_equal Dir.home, `#{A1_PATH} -c 'cd; echo $PWD'`.strip end def test_builtin_cd - skip "cannot easily implement without sequencing with ; or &&" + requires_extended_shell!("multi-command sequencing with ;") + assert_equal File.join(Dir.pwd, "blah"), `#{A1_PATH} -c 'mkdir -p blah; cd blah; echo $PWD; cd ..; rm -rf blah'`.strip end def test_builtin_cd_dash - skip "cannot easily implement without sequencing with ; or &&" + requires_extended_shell!("multi-command sequencing with ;") + assert_equal Dir.pwd, `#{A1_PATH} -c 'mkdir -p blah; cd blah; cd -; rm -rf blah; echo $PWD'`.strip end def test_builtin_cd_parent - skip "cannot easily implement without sequencing with ; or &&" + requires_extended_shell!("multi-command sequencing with ;") + assert_equal Dir.pwd, `#{A1_PATH} -c 'mkdir -p blah; cd blah; cd ..; rm -rf blah; echo $PWD'`.strip end def test_builtin_pwd