[ruby] Modernize Ruby shell parsing and expansion, add C compat test mode (#4)

Replace Ruby's old wordexp-like command splitting with a tokenizer and
parser that understands ; and && while honoring quotes and nesting.

Implement richer expansions for command substitution, arithmetic,
parameter defaults (${var:-...}), brace expansion, and escaped
dollar/backtick behavior via shared quote-state handling.

Expand the test suite with parser/expansion edge cases, escaping
parity checks, builtin usage validation, and job-control refresh tests.

Keep C green by adding a compat test profile for c/Makefile test and
by returning nonzero on builtin failures in -c mode, including clearer
`bg` usage output.
This commit is contained in:
Sami Samhuri 2026-02-07 15:18:41 -08:00 committed by GitHub
parent 79ba26c76b
commit 4f4e97475b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 1260 additions and 71 deletions

View file

@ -10,7 +10,7 @@ a1: $(OBJS)
$(CC) $(CFLAGS) -o a1 $(OBJS) $(LDFLAGS) -lreadline -lhistory -ltermcap
test: a1
cd ../ruby && A1_PATH=../c/a1 bundle exec rake test
cd ../ruby && A1_PATH=../c/a1 A1_TEST_PROFILE=compat bundle exec rake test
clean:
rm -rf $(OBJS) a1

View file

@ -26,8 +26,8 @@
int builtin_bg(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "bg: usage 'bg <command>'\n");
fprintf(stderr, " runs <command> in the background\n");
fprintf(stderr, "Usage: bg <command>\n");
fprintf(stderr, "Runs <command> in the background\n");
return -1;
}

View file

@ -155,6 +155,8 @@ int handle_wordexp_result(int result, char *cmd) {
int process_command(char *line, options_t options) {
wordexp_t words;
int builtin_result = 0;
bool builtin_executed = false;
int result = wordexp(line, &words, WRDE_SHOWERR | WRDE_UNDEF);
if (handle_wordexp_result(result, line) && words.we_wordc > 0) {
if (options->verbose) {
@ -165,18 +167,26 @@ int process_command(char *line, options_t options) {
fprintf(stderr, "}\n");
}
/* try the built-in commands */
if (cmd_matches("bg", words.we_wordv[0]))
builtin_bg(words.we_wordc, words.we_wordv);
else if (cmd_matches("bgkill", words.we_wordv[0]))
builtin_bgkill(words.we_wordc, words.we_wordv);
else if (cmd_matches("bglist", words.we_wordv[0]))
builtin_bglist();
else if (cmd_matches("cd", words.we_wordv[0]))
builtin_cd(words.we_wordc, words.we_wordv);
else if (cmd_matches("clear", words.we_wordv[0]))
if (cmd_matches("bg", words.we_wordv[0])) {
builtin_result = builtin_bg(words.we_wordc, words.we_wordv);
builtin_executed = true;
} else if (cmd_matches("bgkill", words.we_wordv[0])) {
builtin_result = builtin_bgkill(words.we_wordc, words.we_wordv);
builtin_executed = true;
} else if (cmd_matches("bglist", words.we_wordv[0])) {
builtin_result = builtin_bglist();
builtin_executed = true;
} else if (cmd_matches("cd", words.we_wordv[0])) {
builtin_result = builtin_cd(words.we_wordc, words.we_wordv);
builtin_executed = true;
} else if (cmd_matches("clear", words.we_wordv[0])) {
builtin_clear();
else if (cmd_matches("pwd", words.we_wordv[0]))
builtin_executed = true;
} else if (cmd_matches("pwd", words.we_wordv[0])) {
builtin_pwd();
builtin_executed = true;
builtin_result = 0;
}
else if (cmd_matches("exit", words.we_wordv[0])) {
exit(0);
} else {
@ -189,6 +199,9 @@ int process_command(char *line, options_t options) {
}
add_history(line); /* add to the readline history */
wordfree(&words);
if (builtin_executed && builtin_result < 0) {
return builtin_result;
}
return 0;
} else {
return -2;

View file

@ -6,4 +6,4 @@ gem "minitest", "~> 6.0"
gem "parser", "~> 3.3.10"
gem "rake", "~> 13.0"
gem "reline", "~> 0.6"
gem "standard", "~> 1.52.0", require: false
gem "standard", "~> 1.53.0", require: false

View file

@ -7,14 +7,13 @@ GEM
ffi (1.17.3-x86_64-linux-gnu)
formatador (1.2.3)
reline
guard (2.19.1)
guard (2.20.1)
formatador (>= 0.2.4)
listen (>= 2.7, < 4.0)
logger (~> 1.6)
lumberjack (>= 1.0.12, < 2.0)
nenv (~> 0.1)
notiffany (~> 0.0)
ostruct (~> 0.6)
pry (>= 0.13.0)
shellany (~> 0.0)
thor (>= 0.18.1)
@ -22,10 +21,11 @@ GEM
guard
rake
io-console (0.8.2)
json (2.18.0)
json (2.18.1)
language_server-protocol (3.17.0.5)
lint_roller (1.1.0)
listen (3.9.0)
listen (3.10.0)
logger
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
logger (1.7.0)
@ -37,12 +37,11 @@ GEM
notiffany (0.1.3)
nenv (~> 0.1)
shellany (~> 0.0)
ostruct (0.6.3)
parallel (1.27.0)
parser (3.3.10.0)
parser (3.3.10.1)
ast (~> 2.4.1)
racc
prism (1.7.0)
prism (1.9.0)
pry (0.16.0)
coderay (~> 1.1)
method_source (~> 1.0)
@ -56,7 +55,7 @@ GEM
regexp_parser (2.11.3)
reline (0.6.3)
io-console (~> 0.5)
rubocop (1.81.7)
rubocop (1.82.1)
json (~> 2.3)
language_server-protocol (~> 3.17.0.2)
lint_roller (~> 1.1.0)
@ -64,7 +63,7 @@ GEM
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 2.9.3, < 3.0)
rubocop-ast (>= 1.47.1, < 2.0)
rubocop-ast (>= 1.48.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 4.0)
rubocop-ast (1.49.0)
@ -76,10 +75,10 @@ GEM
rubocop-ast (>= 1.47.1, < 2.0)
ruby-progressbar (1.13.0)
shellany (0.0.1)
standard (1.52.0)
standard (1.53.0)
language_server-protocol (~> 3.17.0.2)
lint_roller (~> 1.0)
rubocop (~> 1.81.7)
rubocop (~> 1.82.0)
standard-custom (~> 1.0.0)
standard-performance (~> 1.8)
standard-custom (1.0.2)
@ -88,7 +87,7 @@ GEM
standard-performance (1.9.0)
lint_roller (~> 1.1)
rubocop-performance (~> 1.26.0)
thor (1.4.0)
thor (1.5.0)
unicode-display_width (3.2.0)
unicode-emoji (~> 4.1)
unicode-emoji (4.2.0)
@ -107,7 +106,7 @@ DEPENDENCIES
parser (~> 3.3.10)
rake (~> 13.0)
reline (~> 0.6)
standard (~> 1.52.0)
standard (~> 1.53.0)
BUNDLED WITH
4.0.3

View file

@ -3,6 +3,8 @@ require "shell/logger"
module Shell
class Builtins
EXPORT_VARIABLE_PATTERN = /\$\w+/
attr_reader :job_control, :logger
def initialize(job_control: nil, logger: nil)
@ -24,6 +26,11 @@ module Shell
#################
def builtin_bg(args)
if args.empty?
logger.warn "Usage: bg <command>"
return -1
end
cmd = args.shift
job_control.exec_command(cmd, args, background: true)
end
@ -51,17 +58,35 @@ module Shell
end
def builtin_cd(args)
Dir.chdir args.first
dir = args.first
oldpwd = Dir.pwd
target = case dir
in nil
Dir.home
in "-"
ENV["OLDPWD"] || oldpwd
else
dir
end
Dir.chdir target
ENV["OLDPWD"] = oldpwd
ENV["PWD"] = Dir.pwd
0
end
def builtin_export(args)
if args.count != 1 || args.first.nil? || !args.first.include?("=")
logger.warn "Usage: export NAME=value"
return -1
end
# only supports one variable and doesn't support quoting
name, *value_parts = args.first.strip.split("=")
if name.nil? || name.empty?
logger.warn "#{red("[ERROR]")} Invalid export command"
return -1
else
ENV[name] = value_parts.join("=").gsub(/\$\w+/) { |m| ENV[m[1..]] || "" }
ENV[name] = value_parts.join("=").gsub(EXPORT_VARIABLE_PATTERN) { ENV[it[1..]] || "" }
end
0
end

View file

@ -21,7 +21,9 @@ module Shell
if options[:command]
logger.verbose "Executing command: #{options[:command]}"
print_logs
exit repl.process_command(options[:command])
status = repl.process_command(options[:command])
print_logs
exit status
elsif $stdin.isatty
repl.start(options: options)
end

View file

@ -10,9 +10,10 @@ module Shell
attr_reader :logger
def initialize(logger: nil)
def initialize(logger: nil, refresh_line: nil)
@jobs_by_pid = {}
@logger = logger || Logger.instance
@refresh_line = refresh_line || -> { Readline.refresh_line }
end
def exec_command(cmd, args, background: false)
@ -67,7 +68,11 @@ module Shell
def trap_sigchld
# handler for SIGCHLD when a child's state changes
Signal.trap("CHLD") do |_signo|
pid = Process.waitpid(-1, Process::WNOHANG)
pid = begin
Process.waitpid(-1, Process::WNOHANG)
rescue Errno::ECHILD
nil
end
if pid.nil?
# no-op
elsif (job = @jobs_by_pid[pid])
@ -79,7 +84,7 @@ module Shell
else
warn "\n#{yellow("[WARN]")} No job found for child with PID #{pid}"
end
Readline.refresh_line
@refresh_line.call
end
end

View file

@ -0,0 +1,64 @@
module Shell
# Shared quote/escape state machine for parsers that walk shell-like strings.
class QuoteCursor
attr_reader :state
def initialize(state: :unquoted)
@state = state
end
def unquoted?
state == :unquoted
end
# Consumes one logical unit from line[index], which may be one character
# or an escape pair (e.g., \" or \\$), and updates internal quote state.
# Returns [segment, next_index].
def consume(line, index)
c = line[index]
case state
when :unquoted
consume_unquoted(line, index, c)
when :single_quoted
consume_single_quoted(index, c)
when :double_quoted
consume_double_quoted(line, index, c)
else
raise "Unknown state #{state}"
end
end
private
def consume_unquoted(line, index, c)
case c
when "'"
@state = :single_quoted
when "\""
@state = :double_quoted
when "\\"
if index + 1 < line.length
return [line[index, 2], index + 2]
end
end
[c, index + 1]
end
def consume_single_quoted(index, c)
@state = :unquoted if c == "'"
[c, index + 1]
end
def consume_double_quoted(line, index, c)
if c == "\\"
if index + 1 < line.length
return [line[index, 2], index + 2]
end
elsif c == "\""
@state = :unquoted
end
[c, index + 1]
end
end
end

View file

@ -8,6 +8,7 @@ require "shell/builtins"
require "shell/colours"
require "shell/job_control"
require "shell/logger"
require "shell/string_parser"
require "shell/word_expander"
module Shell
@ -50,24 +51,38 @@ module Shell
return 0 if line.strip.empty? # no input, no-op
logger.verbose "Processing command: #{line.inspect}"
args = word_expander.expand(line)
cmd = args.shift
logger.verbose "Parsed command: #{cmd} #{args.inspect}"
if builtins.builtin?(cmd)
logger.verbose "Executing builtin #{cmd}"
builtins.exec(cmd, args)
commands = parse_line(line)
result = 0
commands.each do |entry|
case entry
in StringParser::Command[text:, op:]
command = text
next if command.strip.empty?
next if op == :and && result != 0
args = word_expander.expand(command)
program = args.shift
logger.verbose "Parsed command: #{program} #{args.inspect}"
if builtins.builtin?(program)
logger.verbose "Executing builtin #{program}"
result = builtins.exec(program, args)
else
logger.verbose "Shelling out for #{cmd}"
job_control.exec_command(cmd, args)
logger.verbose "Shelling out for #{program}"
result = job_control.exec_command(program, args)
end
rescue Errno => e
else
raise ArgumentError, "Unknown parsed command node: #{entry.inspect}"
end
end
result
rescue => e
warn "#{red("[ERROR]")} #{e.message}"
-1
end
# Looks like this: /path/to/somewhere%
def prompt(pwd)
"#{blue(pwd)}#{white("%")} #{CLEAR}"
end
def prompt(pwd) = "#{blue(pwd)}#{white("%")} #{CLEAR}"
def parse_line(line) = StringParser.split_commands(line)
end
end

365
ruby/shell/string_parser.rb Normal file
View file

@ -0,0 +1,365 @@
module Shell
class StringParser
Command = Data.define(:text, :op)
Token = Data.define(:type, :value)
class Scanner
def initialize(line, index: 0)
@line = line
@index = index
end
attr_reader :index
def tokenize_command_list
tokens = []
segment_start = index
until eof?
c = current_char
if c == ";"
tokens << Token.new(type: :text, value: @line[segment_start...index])
tokens << Token.new(type: :separator, value: :always)
advance
segment_start = index
next
end
if c == "&" && peek(1) == "&"
tokens << Token.new(type: :text, value: @line[segment_start...index])
tokens << Token.new(type: :separator, value: :and)
advance(2)
segment_start = index
next
end
case c
when "\\"
advance_escape
when "'"
skip_single_quoted
when "\""
skip_double_quoted
when "`"
skip_backtick
when "$"
if peek(1) == "("
if peek(2) == "("
skip_arithmetic_substitution
else
skip_command_substitution
end
else
advance
end
else
advance
end
end
tokens << Token.new(type: :text, value: @line[segment_start...index])
tokens
end
# Reads the contents and end-index for a command substitution body, where
# index points to the first character after "$(".
def read_dollar_paren_body
output = +""
depth = 1
until eof?
c = current_char
if c == "\\"
output << read_escape
next
end
if c == "'"
output << read_single_quoted
next
end
if c == "\""
output << read_double_quoted
next
end
if c == "`"
output << read_backtick
next
end
if c == "$" && peek(1) == "("
if peek(2) == "("
output << read_arithmetic_substitution
else
output << "$("
advance(2)
depth += 1
end
next
end
if c == "("
output << c
depth += 1
advance
next
end
if c == ")"
depth -= 1
if depth.zero?
return [output, index + 1]
end
output << c
advance
next
end
output << c
advance
end
raise ArgumentError, "Unmatched $(...)"
end
private
def eof?
index >= @line.length
end
def current_char
@line[index]
end
def peek(offset)
@line[index + offset]
end
def advance(count = 1)
@index += count
end
def advance_escape
advance
advance unless eof?
end
def skip_single_quoted
advance # opening quote
until eof?
if current_char == "'"
advance
return
end
advance
end
raise ArgumentError, "Unmatched quote"
end
def skip_double_quoted
advance # opening quote
until eof?
c = current_char
case c
when "\\"
advance_escape
when "\""
advance
return
when "`"
skip_backtick
when "$"
if peek(1) == "("
if peek(2) == "("
skip_arithmetic_substitution
else
skip_command_substitution
end
else
advance
end
else
advance
end
end
raise ArgumentError, "Unmatched quote"
end
def skip_backtick
advance # opening backtick
until eof?
c = current_char
case c
when "\\"
advance_escape
when "`"
advance
return
when "$"
if peek(1) == "("
if peek(2) == "("
skip_arithmetic_substitution
else
skip_command_substitution
end
else
advance
end
else
advance
end
end
raise ArgumentError, "Unmatched backtick"
end
def skip_command_substitution
advance(2) # consume "$("
depth = 1
until eof?
c = current_char
case c
when "\\"
advance_escape
when "'"
skip_single_quoted
when "\""
skip_double_quoted
when "`"
skip_backtick
when "$"
if peek(1) == "("
if peek(2) == "("
skip_arithmetic_substitution
else
advance(2)
depth += 1
end
else
advance
end
when "("
advance
depth += 1
when ")"
advance
depth -= 1
return if depth.zero?
else
advance
end
end
raise ArgumentError, "Unmatched $(...)"
end
def skip_arithmetic_substitution
advance(3) # consume "$(("
depth = 1
until eof?
c = current_char
case c
when "\\"
advance_escape
when "'"
skip_single_quoted
when "\""
skip_double_quoted
when "`"
skip_backtick
when "$"
if peek(1) == "("
if peek(2) == "("
advance(3)
depth += 1
else
skip_command_substitution
end
else
advance
end
when ")"
if peek(1) == ")"
advance(2)
depth -= 1
return if depth.zero?
else
advance
end
else
advance
end
end
raise ArgumentError, "Unmatched $((...))"
end
def read_escape
start = index
advance_escape
@line[start...index]
end
def read_single_quoted
start = index
skip_single_quoted
@line[start...index]
end
def read_double_quoted
start = index
skip_double_quoted
@line[start...index]
end
def read_backtick
start = index
skip_backtick
@line[start...index]
end
def read_arithmetic_substitution
start = index
skip_arithmetic_substitution
@line[start...index]
end
end
class << self
def split_commands(line)
commands = []
next_op = :always
tokens = Scanner.new(line).tokenize_command_list
tokens.each do |token|
case token
in Token[type: :text, value:]
if next_op == :and && value.strip.empty?
raise ArgumentError, "syntax error: expected command after `&&`"
end
commands << Command.new(text: value, op: next_op)
next_op = :always
in Token[type: :separator, value: :and]
if commands.empty? || commands.last.text.strip.empty?
raise ArgumentError, "syntax error near unexpected token `&&`"
end
next_op = :and
in Token[type: :separator, value: :always]
next_op = :always
else
raise ArgumentError, "Unknown token type: #{token.type}"
end
end
commands
end
def read_dollar_paren(line, start_index) = Scanner.new(line, index: start_index).read_dollar_paren_body
end
end
end

View file

@ -1,8 +1,29 @@
require "shellwords"
require "open3"
require "shell/quote_cursor"
require "shell/string_parser"
module Shell
class WordExpander
ENV_VAR_REGEX = /\$(?:\{([^}]+)\}|(\w+)\b)/
ESCAPED_DOLLAR = "\u0001"
ESCAPED_BACKTICK = "\u0002"
GLOB_PATTERN = /[*?\[]/
SHELLSPLIT_PATTERN = /\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m
DOUBLE_QUOTE_ESCAPES_PATTERN = /\\([$`"\\\n])/
SINGLE_ESCAPE_PATTERN = /\\(.)/
TILDE_PREFIX_PATTERN = /^~([^\/]*)/
VARIABLE_FIRST_CHAR_PATTERN = /[A-Za-z_]/
VARIABLE_CHAR_PATTERN = /[A-Za-z0-9_]/
TRAILING_NEWLINES_PATTERN = /\n+\z/
ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN = /([\\"])/
ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN = /(\\|["'])/
WHITESPACE_PATTERN = /\s/
DIGIT_PATTERN = /\d/
ARITHMETIC_IDENTIFIER_FIRST_PATTERN = /[A-Za-z_]/
ARITHMETIC_IDENTIFIER_PATTERN = /[A-Za-z0-9_]/
ARITHMETIC_OPERATOR_PATTERN = %r{[+\-*/()%]}
BRACE_EXPANSION_PATTERN = /(.*?)\{([^{}]*)\}(.*)/
SplitWord = Data.define(:text, :globbed)
CommandSubstitutionError = Class.new(StandardError)
# Splits the given line into multiple words, performing the following transformations:
#
@ -11,23 +32,33 @@ module Shell
# - Tilde expansion, which means that ~ is expanded to $HOME
# - Glob expansion on files and directories
def expand(line)
shellsplit(line)
.map do |word|
word
.gsub(ENV_VAR_REGEX) do
name = Regexp.last_match(2) || Regexp.last_match(1)
ENV.fetch(name)
protected_line = protect_escaped_dollars(line)
substituted_line = expand_command_substitution(protected_line)
shellsplit_tokens(substituted_line)
.flat_map do |word|
expanded = expand_variables(word.text)
.tr(ESCAPED_DOLLAR, "$")
.tr(ESCAPED_BACKTICK, "`")
expand_braces(expanded).map { SplitWord.new(text: it, globbed: word.globbed) }
end
.flat_map do |word|
if word.globbed
[word.text]
elsif GLOB_PATTERN.match?(word.text)
glob_words = expand_globs(word.text)
glob_words.empty? ? [word.text] : glob_words
else
[word.text]
end
# TODO: expand globs
end
end
# Lifted directly from Ruby 4.0.0.
# Adapted from Ruby's Shellwords splitting logic.
#
# Splits a string into an array of tokens in the same way the UNIX
# Bourne shell does.
#
# argv = Shellwords.split('here are "two words"')
# argv = shellsplit('here are "two words"')
# argv #=> ["here", "are", "two words"]
#
# +line+ must not contain NUL characters because of nature of
@ -37,7 +68,7 @@ module Shell
# metacharacters except for the single and double quotes and
# backslash are not treated as such.
#
# argv = Shellwords.split('ruby my_prog.rb | less')
# argv = shellsplit('ruby my_prog.rb | less')
# argv #=> ["ruby", "my_prog.rb", "|", "less"]
#
# String#shellsplit is a shortcut for this function.
@ -45,12 +76,15 @@ module Shell
# argv = 'here are "two words"'.shellsplit
# argv #=> ["here", "are", "two words"]
def shellsplit(line)
shellsplit_tokens(line).map(&:text)
end
def shellsplit_tokens(line)
words = []
field = "".dup
at_word_start = true
found_glob_char = false
line.scan(/\G\s*(?>([^\0\s\\'"]+)|'([^\0']*)'|"((?:[^\0"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do
|word, sq, dq, esc, garbage, sep|
line.scan(SHELLSPLIT_PATTERN) do |word, sq, dq, esc, garbage, sep|
if garbage
b = $~.begin(0)
line = $~[0]
@ -64,19 +98,28 @@ module Shell
# characters when considered special:
#
# $ ` " \ <newline>
field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1'))
found_glob_char = word && word =~ /[*?\[]/ # must be unquoted
field << (word || sq || (dq && dq.gsub(DOUBLE_QUOTE_ESCAPES_PATTERN, '\\1')) || esc.gsub(SINGLE_ESCAPE_PATTERN, '\\1'))
found_glob_char = word&.match?(GLOB_PATTERN) # must be unquoted
# Expand tildes at the beginning of unquoted words.
if word && at_word_start
field.sub!(/^~/, Dir.home)
field.sub!(TILDE_PREFIX_PATTERN) do
user = Regexp.last_match(1)
user.empty? ? Dir.home : Dir.home(user)
rescue ArgumentError
"~#{user}"
end
end
at_word_start = false
if sep
if found_glob_char
glob_words = expand_globs(field)
words += (glob_words.empty? ? [field] : glob_words)
if glob_words.empty?
words << SplitWord.new(text: field, globbed: false)
else
words << field
glob_words.each { words << SplitWord.new(text: it, globbed: true) }
end
else
words << SplitWord.new(text: field, globbed: false)
end
field = "".dup
at_word_start = true
@ -89,5 +132,445 @@ module Shell
def expand_globs(word)
Dir.glob(word)
end
def expand_variables(value)
output = +""
i = 0
while i < value.length
if value[i] != "$"
output << value[i]
i += 1
next
end
if value[i + 1] == "{"
raw, i = read_braced_variable(value, i + 2)
output << resolve_braced_variable(raw)
elsif variable_char?(value[i + 1], first: true)
j = i + 2
j += 1 while j < value.length && variable_char?(value[j], first: false)
output << ENV.fetch(value[(i + 1)...j])
i = j
else
output << "$"
i += 1
end
end
output
end
def read_braced_variable(value, start_index)
output = +""
depth = 1
i = start_index
while i < value.length
c = value[i]
if c == "{"
depth += 1
elsif c == "}"
depth -= 1
return [output, i + 1] if depth.zero?
end
output << c
i += 1
end
raise ArgumentError, "Unmatched ${...}"
end
def resolve_braced_variable(raw)
name, fallback = split_default_expression(raw)
if fallback
env_value = ENV[name]
(env_value.nil? || env_value.empty?) ? expand_variables(fallback) : env_value
else
ENV.fetch(name)
end
end
def split_default_expression(raw)
depth = 0
i = 0
while i < raw.length - 1
c = raw[i]
if c == "{"
depth += 1
elsif c == "}"
depth -= 1 if depth > 0
elsif depth.zero? && c == ":" && raw[i + 1] == "-"
return [raw[0...i], raw[(i + 2)..]]
end
i += 1
end
[raw, nil]
end
def variable_char?(char, first:)
return false if char.nil?
first ? VARIABLE_FIRST_CHAR_PATTERN.match?(char) : VARIABLE_CHAR_PATTERN.match?(char)
end
def expand_command_substitution(line)
output = +""
i = 0
cursor = QuoteCursor.new
while i < line.length
c = line[i]
if cursor.unquoted?
case c
when "`"
cmd, i = read_backtick(line, i + 1)
output << escape_substitution_output(run_command_substitution(cmd), :unquoted)
when "$"
if line[i + 1] == "("
if line[i + 2] == "("
expr, i = read_arithmetic(line, i + 3)
output << expand_arithmetic(expr)
else
cmd, i = read_dollar_paren(line, i + 2)
output << escape_substitution_output(run_command_substitution(cmd), :unquoted)
end
else
output << c
i += 1
end
when "\\"
if i + 1 < line.length
escaped = line[i + 1]
if escaped == "$"
output << ESCAPED_DOLLAR
i += 2
elsif escaped == "`"
output << ESCAPED_BACKTICK
i += 2
else
segment, i = cursor.consume(line, i)
output << segment
end
else
segment, i = cursor.consume(line, i)
output << segment
end
else
segment, i = cursor.consume(line, i)
output << segment
end
elsif cursor.state == :double_quoted
case c
when "\\"
if i + 1 < line.length
escaped = line[i + 1]
if escaped == "$" || escaped == "`"
output << escaped_replacement(escaped)
else
output << "\\"
output << escaped
end
i += 2
else
segment, i = cursor.consume(line, i)
output << segment
end
when "`"
cmd, i = read_backtick(line, i + 1)
output << escape_substitution_output(run_command_substitution(cmd), :double_quoted)
when "$"
if line[i + 1] == "("
if line[i + 2] == "("
expr, i = read_arithmetic(line, i + 3)
output << expand_arithmetic(expr)
else
cmd, i = read_dollar_paren(line, i + 2)
output << escape_substitution_output(run_command_substitution(cmd), :double_quoted)
end
else
segment, i = cursor.consume(line, i)
output << segment
end
else
segment, i = cursor.consume(line, i)
output << segment
end
else
segment, i = cursor.consume(line, i)
output << segment
end
end
output
end
def read_backtick(line, start_index)
output = +""
i = start_index
while i < line.length
c = line[i]
if c == "`"
return [output, i + 1]
end
if c == "\\"
if i + 1 < line.length
output << line[i + 1]
i += 2
next
end
end
output << c
i += 1
end
raise ArgumentError, "Unmatched backtick"
end
def read_dollar_paren(line, start_index)
StringParser.read_dollar_paren(line, start_index)
end
def read_arithmetic(line, start_index)
output = +""
i = start_index
depth = 1
while i < line.length
c = line[i]
if c == "("
depth += 1
output << c
elsif c == ")"
depth -= 1
if depth.zero?
if line[i + 1] == ")"
return [output, i + 2]
else
depth += 1
output << c
end
else
output << c
end
else
output << c
end
i += 1
end
raise ArgumentError, "Unmatched $((...))"
end
def run_command_substitution(command)
stdout, stderr, status = Open3.capture3("/bin/sh", "-c", command)
unless status.success?
reason = status.exitstatus ? "exit #{status.exitstatus}" : "signal #{status.termsig}"
details = stderr.to_s.strip
message = "command substitution failed (#{reason}): #{command}"
message = "#{message}: #{details}" unless details.empty?
raise CommandSubstitutionError, message
end
stdout = stdout.sub(TRAILING_NEWLINES_PATTERN, "")
stdout.tr("\n", " ")
end
def escape_substitution_output(value, context)
escaped = value.gsub("$", ESCAPED_DOLLAR)
case context
when :double_quoted
escaped.gsub(ESCAPE_DOUBLE_QUOTED_SUBSTITUTION_PATTERN, '\\\\\1')
when :unquoted
escaped.gsub(ESCAPE_UNQUOTED_SUBSTITUTION_PATTERN, '\\\\\1')
else
escaped
end
end
def expand_arithmetic(expr)
tokens = tokenize_arithmetic(expr)
rpn = arithmetic_to_rpn(tokens)
evaluate_rpn(rpn).to_s
end
def tokenize_arithmetic(expr)
tokens = []
i = 0
while i < expr.length
c = expr[i]
if c.match?(WHITESPACE_PATTERN)
i += 1
next
end
if c.match?(DIGIT_PATTERN)
j = i + 1
j += 1 while j < expr.length && expr[j].match?(DIGIT_PATTERN)
tokens << [:number, expr[i...j].to_i]
i = j
next
end
if c.match?(ARITHMETIC_IDENTIFIER_FIRST_PATTERN)
j = i + 1
j += 1 while j < expr.length && expr[j].match?(ARITHMETIC_IDENTIFIER_PATTERN)
name = expr[i...j]
value = ENV[name]
value = (value.nil? || value.empty?) ? 0 : value.to_i
tokens << [:number, value]
i = j
next
end
if c.match?(ARITHMETIC_OPERATOR_PATTERN)
tokens << [:op, c]
i += 1
next
end
raise ArgumentError, "Invalid arithmetic expression: #{expr}"
end
tokens
end
def arithmetic_to_rpn(tokens)
output = []
ops = []
prev_type = nil
tokens.each do |type, value|
if type == :number
output << [:number, value]
prev_type = :number
next
end
op = value
if op == "("
ops << op
prev_type = :lparen
next
end
if op == ")"
while (top = ops.pop)
break if top == "("
output << [:op, top]
end
raise ArgumentError, "Unmatched ) in arithmetic expression" if top != "("
prev_type = :rparen
next
end
if op == "-" && (prev_type.nil? || prev_type == :op || prev_type == :lparen)
op = "u-"
elsif op == "+" && (prev_type.nil? || prev_type == :op || prev_type == :lparen)
op = "u+"
end
while !ops.empty? && precedence(ops.last) >= precedence(op)
output << [:op, ops.pop]
end
ops << op
prev_type = :op
end
while (top = ops.pop)
raise ArgumentError, "Unmatched ( in arithmetic expression" if top == "("
output << [:op, top]
end
output
end
def precedence(op)
case op
when "u+", "u-"
3
when "*", "/", "%"
2
when "+", "-"
1
else
0
end
end
def evaluate_rpn(rpn)
stack = []
rpn.each do |type, value|
if type == :number
stack << value
next
end
case value
when "u+"
raise ArgumentError, "Invalid arithmetic expression" if stack.empty?
stack << stack.pop
when "u-"
raise ArgumentError, "Invalid arithmetic expression" if stack.empty?
stack << -stack.pop
else
b = stack.pop
a = stack.pop
raise ArgumentError, "Invalid arithmetic expression" if a.nil? || b.nil?
stack << apply_operator(a, b, value)
end
end
raise ArgumentError, "Invalid arithmetic expression" unless stack.length == 1
stack[0]
end
def apply_operator(a, b, op)
case op
when "+"
a + b
when "-"
a - b
when "*"
a * b
when "/"
(b == 0) ? 0 : a / b
when "%"
(b == 0) ? 0 : a % b
else
raise ArgumentError, "Invalid arithmetic expression"
end
end
def expand_braces(word)
# Simple, non-nested brace expansion: pre{a,b}post -> preapost, prebpost
match = word.match(BRACE_EXPANSION_PATTERN)
return [word] unless match
prefix = match[1]
body = match[2]
suffix = match[3]
return [word] unless body.include?(",")
parts = body.split(",", -1)
parts.flat_map { expand_braces(prefix + it + suffix) }
end
def escaped_replacement(char)
case char
when "$"
ESCAPED_DOLLAR
when "`"
ESCAPED_BACKTICK
else
char
end
end
def protect_escaped_dollars(line)
output = +""
i = 0
while i < line.length
if line.getbyte(i) == "\\".ord
j = i + 1
j += 1 while j < line.length && line.getbyte(j) == "\\".ord
count = j - i
if j < line.length && line.getbyte(j) == "$".ord && count.odd?
output << ("\\" * (count - 1))
output << ESCAPED_DOLLAR
i = j + 1
else
output << ("\\" * count)
i = j
end
else
output << line[i]
i += 1
end
end
output
end
end
end

View file

@ -1,9 +1,16 @@
require "minitest/autorun"
require "etc"
require "open3"
require "timeout"
$LOAD_PATH.unshift(File.expand_path("..", __dir__))
require_relative "../shell/job_control"
require_relative "../shell/logger"
class ShellTest < Minitest::Test
TRIVIAL_SHELL_SCRIPT = "#!/bin/sh\ntrue".freeze
A1_PATH = ENV.fetch("A1_PATH", "./a1").freeze
COMPAT_PROFILE = ENV["A1_TEST_PROFILE"] == "compat"
def setup
FileUtils.mkdir_p("test_bin")
@ -17,6 +24,12 @@ class ShellTest < Minitest::Test
"#!/bin/sh\necho '#{code}'"
end
def requires_extended_shell!(feature)
return unless COMPAT_PROFILE
skip "requires extended shell feature: #{feature}"
end
def test_expands_environment_variables
assert_equal Dir.home, `#{A1_PATH} -c 'echo $HOME'`.chomp
assert_equal Dir.home, `#{A1_PATH} -c 'echo ${HOME}'`.chomp
@ -39,6 +52,10 @@ class ShellTest < Minitest::Test
assert_equal "a b", `#{A1_PATH} -c 'echo \"a b\"'`.chomp
end
def test_respects_escaped_double_quote_in_double_quotes
assert_equal "a\"b", `#{A1_PATH} -c 'echo \"a\\\"b\"'`.chomp
end
def test_respects_single_quotes
assert_equal "a b", `#{A1_PATH} -c \"echo 'a b'\"`.chomp
end
@ -57,6 +74,189 @@ class ShellTest < Minitest::Test
FileUtils.rm_f("globtest_b.txt")
end
def test_does_not_reglob_expanded_paths
File.write("globspecial_a.txt", TRIVIAL_SHELL_SCRIPT)
File.write("globspecial_[a].txt", TRIVIAL_SHELL_SCRIPT)
output = `#{A1_PATH} -c 'echo globspecial_*.txt'`.chomp.split
assert_equal ["globspecial_[a].txt", "globspecial_a.txt"], output.sort
ensure
FileUtils.rm_f("globspecial_a.txt")
FileUtils.rm_f("globspecial_[a].txt")
end
def test_does_not_expand_escaped_dollar
assert_equal "$HOME", `#{A1_PATH} -c 'echo \\$HOME'`.chomp
end
def test_expands_brace_expansion
requires_extended_shell!("brace expansion")
assert_equal "a b", `#{A1_PATH} -c 'echo {a,b}'`.chomp
end
def test_expands_command_substitution_backticks
assert_equal "hi", %x(#{A1_PATH} -c 'echo `echo hi`').chomp
end
def test_expands_command_substitution_dollar_paren
assert_equal "hi", `#{A1_PATH} -c 'echo $(echo hi)'`.chomp
end
def test_keeps_control_operators_inside_command_substitution
requires_extended_shell!("nested command parsing in substitutions")
semicolon_stdout, semicolon_stderr, semicolon_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi; echo bye)")
assert semicolon_status.success?, semicolon_stderr
assert_equal "hi bye\n", semicolon_stdout
and_stdout, and_stderr, and_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi && echo bye)")
assert and_status.success?, and_stderr
assert_equal "hi bye\n", and_stdout
end
def test_expands_command_substitution_with_escaped_quote
requires_extended_shell!("escaped quote handling in substitutions")
assert_equal "a\"b", `#{A1_PATH} -c 'echo $(printf \"%s\" \"a\\\"b\")'`.chomp
end
def test_expands_arithmetic
assert_equal "3", `#{A1_PATH} -c 'echo $((1 + 2))'`.chomp
end
def test_expands_arithmetic_with_variables
requires_extended_shell!("arithmetic variable lookup")
assert_equal "3", `A1_NUM=2 #{A1_PATH} -c 'echo $((A1_NUM + 1))'`.chomp
end
def test_expands_tilde_user
user = Etc.getlogin
skip "no login user" unless user
assert_equal Dir.home(user), `#{A1_PATH} -c 'echo ~#{user}'`.chomp
end
def test_expands_parameter_default_value
requires_extended_shell!("${var:-fallback}")
assert_equal "fallback", `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-fallback}'`.chomp
end
def test_expands_parameter_default_value_with_variable_reference
requires_extended_shell!("${var:-$OTHER}")
assert_equal Dir.home, `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-$HOME}'`.chomp
end
def test_expands_parameter_default_value_with_command_substitution
requires_extended_shell!("${var:-$(...)}")
assert_equal "hi", `#{A1_PATH} -c 'echo ${A1_UNSET_VAR:-$(echo hi)}'`.chomp
end
def test_expands_glob_from_parameter_default_value
requires_extended_shell!("glob expansion from parameter defaults")
File.write("default_glob_a.txt", TRIVIAL_SHELL_SCRIPT)
File.write("default_glob_b.txt", TRIVIAL_SHELL_SCRIPT)
output = `#{A1_PATH} -c 'printf "%s\n" ${A1_UNSET_GLOB_VAR:-default_glob_*.txt}'`.lines.map(&:chomp).sort
assert_equal ["default_glob_a.txt", "default_glob_b.txt"], output
ensure
FileUtils.rm_f("default_glob_a.txt")
FileUtils.rm_f("default_glob_b.txt")
end
def test_reports_command_substitution_failure_with_status
requires_extended_shell!("command substitution error propagation")
_stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "echo $(exit 7)")
refute status.success?
assert_match(/command substitution failed/, stderr)
assert_match(/exit 7/, stderr)
refute_match(/No such file or directory/, stderr)
end
def test_expands_nested_defaults_with_substitution_and_arithmetic
requires_extended_shell!("nested defaults and arithmetic")
command = 'echo ${A1_OUTER_UNSET:-${A1_MIDDLE_UNSET:-${A1_INNER_UNSET:-$(printf "%s" "calc_$((2+3))")}}}'
assert_equal "calc_5", `#{A1_PATH} -c '#{command}'`.chomp
end
def test_matches_sh_backslash_parity_before_dollar_and_backticks
[1, 2, 3, 4].each do |count|
command = "printf \"%s\\n\" #{"\\" * count}$HOME"
shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command)
sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command)
assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}"
assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}"
end
[1, 2, 3, 4].each do |count|
command = "printf \"%s\\n\" #{"\\" * count}`echo hi`"
shell_stdout, _shell_stderr, shell_status = Open3.capture3(A1_PATH, "-c", command)
sh_stdout, _sh_stderr, sh_status = Open3.capture3("/bin/sh", "-c", command)
assert_equal sh_status.success?, shell_status.success?, "status mismatch for #{command.inspect}"
assert_equal sh_stdout, shell_stdout, "stdout mismatch for #{command.inspect}"
end
end
def test_does_not_expand_escaped_command_substitution_dollar_paren_in_double_quotes
assert_equal "$(echo hi)", `#{A1_PATH} -c 'echo "\\$(echo hi)"'`.chomp
end
def test_does_not_expand_escaped_command_substitution_backticks_in_double_quotes
assert_equal "`echo hi`", %x(#{A1_PATH} -c 'echo "\\`echo hi\\`"').chomp
end
def test_combines_expansions_in_defaults_and_subcommands
requires_extended_shell!("composed substitutions and defaults")
File.write("combo_a.txt", TRIVIAL_SHELL_SCRIPT)
File.write("combo_b.txt", TRIVIAL_SHELL_SCRIPT)
command = [
"printf \"<%s>\\n\"",
"${A1_UNSET_COMPLEX_TEST_VAR:-$(printf \"%s\" \"default_$((1+2))\")}",
"$(printf \"%s\" \"combo_*.txt\")",
"\"$(printf \"%s\" \"quoted value\")\"",
"{left,right}",
"~"
].join(" ")
output = `#{A1_PATH} -c '#{command}'`.lines.map(&:chomp)
assert_equal "<default_3>", output[0]
assert_equal ["<combo_a.txt>", "<combo_b.txt>"], output[1, 2].sort
assert_equal "<quoted value>", output[3]
assert_equal "<left>", output[4]
assert_equal "<right>", output[5]
assert_equal "<#{Dir.home}>", output[6]
assert_equal 7, output.length
ensure
FileUtils.rm_f("combo_a.txt")
FileUtils.rm_f("combo_b.txt")
end
def test_reports_parse_errors_without_ruby_backtrace
_stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "echo \"unterminated")
refute status.success?
refute_match(/\.rb:\d+:in /, stderr)
end
def test_export_without_args_does_not_raise_nomethoderror
_stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "export")
refute status.success?
refute_match(/NoMethodError|undefined method/, stderr)
end
def test_bg_without_command_reports_usage_error
_stdout, stderr, status = Open3.capture3(A1_PATH, "-c", "bg")
refute status.success?
assert_match(/Usage: bg <command>/, stderr)
end
def test_rejects_empty_command_around_and_operator
requires_extended_shell!("top-level && parsing")
_stdout1, stderr1, status1 = Open3.capture3(A1_PATH, "-c", "&& echo hi")
refute status1.success?
assert_match(/syntax/i, stderr1)
_stdout2, stderr2, status2 = Open3.capture3(A1_PATH, "-c", "echo hi &&")
refute status2.success?
assert_match(/syntax/i, stderr2)
end
#################################
### Execution and job control ###
#################################
@ -100,7 +300,21 @@ class ShellTest < Minitest::Test
end
def test_refreshes_readline_after_bg_execution
skip "unimplemented"
called = false
job_control = Shell::JobControl.new(
logger: Shell::Logger.instance,
refresh_line: -> { called = true }
)
previous = job_control.trap_sigchld
begin
job_control.exec_command("echo", ["hello"], background: true)
Timeout.timeout(2) do
sleep 0.01 until called
end
assert called
ensure
Signal.trap("CHLD", previous)
end
end
#########################
@ -108,19 +322,23 @@ class ShellTest < Minitest::Test
#########################
def test_builtin_cd_no_args
skip "cannot easily implement without sequencing with ; or &&"
requires_extended_shell!("multi-command sequencing with ;")
assert_equal Dir.home, `#{A1_PATH} -c 'cd; echo $PWD'`.strip
end
def test_builtin_cd
skip "cannot easily implement without sequencing with ; or &&"
requires_extended_shell!("multi-command sequencing with ;")
assert_equal File.join(Dir.pwd, "blah"), `#{A1_PATH} -c 'mkdir -p blah; cd blah; echo $PWD; cd ..; rm -rf blah'`.strip
end
def test_builtin_cd_dash
skip "cannot easily implement without sequencing with ; or &&"
requires_extended_shell!("multi-command sequencing with ;")
assert_equal Dir.pwd, `#{A1_PATH} -c 'mkdir -p blah; cd blah; cd -; rm -rf blah; echo $PWD'`.strip
end
def test_builtin_cd_parent
skip "cannot easily implement without sequencing with ; or &&"
requires_extended_shell!("multi-command sequencing with ;")
assert_equal Dir.pwd, `#{A1_PATH} -c 'mkdir -p blah; cd blah; cd ..; rm -rf blah; echo $PWD'`.strip
end
def test_builtin_pwd