Extract a string parser and fix a few parsing

This commit is contained in:
Sami Samhuri 2026-02-07 11:52:42 -08:00
parent 35fc514a14
commit c94e4c87e2
No known key found for this signature in database
4 changed files with 171 additions and 111 deletions

View file

@ -8,6 +8,7 @@ require "shell/builtins"
require "shell/colours"
require "shell/job_control"
require "shell/logger"
require "shell/string_parser"
require "shell/word_expander"
module Shell
@ -80,74 +81,7 @@ module Shell
end
def parse_line(line)
commands = []
command = "".dup
state = :unquoted
next_op = :always
i = 0
while i < line.length
c = line[i]
case state
when :unquoted
case c
when ";"
commands << {command: command, op: next_op}
command = "".dup
next_op = :always
i += 1
next
when "&"
if line[i + 1] == "&"
commands << {command: command, op: next_op}
command = "".dup
next_op = :and
i += 2
next
else
command << c
end
when "'"
command << c
state = :single_quoted
when "\""
command << c
state = :double_quoted
when "\\"
command << c
state = :escaped
else
command << c
end
when :single_quoted
command << c
state = :unquoted if c == "'"
when :double_quoted
case c
when "\\"
state = :double_quoted_escape
else
command << c
end
state = :unquoted if c == "\""
when :double_quoted_escape
command << "\\"
command << c
state = :double_quoted
when :escaped
command << c
state = :unquoted
else
raise "Unknown state #{state}"
end
i += 1
end
commands << {command: command, op: next_op}
commands
StringParser.split_commands(line)
end
end
end

132
ruby/shell/string_parser.rb Normal file
View file

@ -0,0 +1,132 @@
module Shell
class StringParser
class << self
def split_commands(line)
commands = []
command = +""
state = :unquoted
next_op = :always
i = 0
while i < line.length
c = line[i]
case state
when :unquoted
case c
when ";"
commands << {command: command, op: next_op}
command = +""
next_op = :always
i += 1
next
when "&"
if line[i + 1] == "&"
commands << {command: command, op: next_op}
command = +""
next_op = :and
i += 2
next
end
when "'"
state = :single_quoted
when "\""
state = :double_quoted
when "\\"
state = :escaped
end
command << c
when :single_quoted
command << c
state = :unquoted if c == "'"
when :double_quoted
command << c
if c == "\\"
state = :double_quoted_escape
elsif c == "\""
state = :unquoted
end
when :double_quoted_escape
command << c
state = :double_quoted
when :escaped
command << c
state = :unquoted
else
raise "Unknown state #{state}"
end
i += 1
end
commands << {command: command, op: next_op}
commands
end
def read_dollar_paren(line, start_index)
output = +""
i = start_index
depth = 1
state = :unquoted
while i < line.length
c = line[i]
case state
when :unquoted
case c
when "("
depth += 1
output << c
when ")"
depth -= 1
return [output, i + 1] if depth.zero?
output << c
when "'"
output << c
state = :single_quoted
when "\""
output << c
state = :double_quoted
when "\\"
output << c
if i + 1 < line.length
output << line[i + 1]
i += 1
end
else
output << c
end
when :single_quoted
output << c
state = :unquoted if c == "'"
when :double_quoted
if c == "\\"
output << c
if i + 1 < line.length
output << line[i + 1]
i += 1
end
else
output << c
state = :unquoted if c == "\""
end
else
raise "Unknown state #{state}"
end
i += 1
end
raise ArgumentError, "Unmatched $(...)"
end
end
end
end

View file

@ -1,5 +1,6 @@
require "shellwords"
require "open3"
require "shell/string_parser"
module Shell
class WordExpander
@ -24,14 +25,6 @@ module Shell
.tr(ESCAPED_BACKTICK, "`")
expand_braces(expanded)
end
.flat_map do |word|
if /[*?\[]/.match?(word)
glob_words = expand_globs(word)
glob_words.empty? ? [word] : glob_words
else
[word]
end
end
end
# Lifted directly from Ruby 4.0.0.
@ -139,7 +132,7 @@ module Shell
i += 1
when "`"
cmd, i = read_backtick(line, i + 1)
output << run_command_substitution(cmd)
output << escape_substitution_output(run_command_substitution(cmd), :unquoted)
when "$"
if line[i + 1] == "("
if line[i + 2] == "("
@ -147,7 +140,7 @@ module Shell
output << expand_arithmetic(expr)
else
cmd, i = read_dollar_paren(line, i + 2)
output << run_command_substitution(cmd)
output << escape_substitution_output(run_command_substitution(cmd), :unquoted)
end
else
output << c
@ -189,7 +182,7 @@ module Shell
when "\\"
if i + 1 < line.length
escaped = line[i + 1]
if escaped == "$" || escaped == "`" || escaped == "\\" || escaped == "\""
if escaped == "$" || escaped == "`"
output << escaped_replacement(escaped)
else
output << "\\"
@ -202,7 +195,7 @@ module Shell
end
when "`"
cmd, i = read_backtick(line, i + 1)
output << run_command_substitution(cmd)
output << escape_substitution_output(run_command_substitution(cmd), :double_quoted)
when "$"
if line[i + 1] == "("
if line[i + 2] == "("
@ -210,7 +203,7 @@ module Shell
output << expand_arithmetic(expr)
else
cmd, i = read_dollar_paren(line, i + 2)
output << run_command_substitution(cmd)
output << escape_substitution_output(run_command_substitution(cmd), :double_quoted)
end
else
output << c
@ -247,36 +240,7 @@ module Shell
end
def read_dollar_paren(line, start_index)
output = +""
i = start_index
depth = 1
state = :unquoted
while i < line.length
c = line[i]
case state
when :unquoted
case c
when "("
depth += 1
when ")"
depth -= 1
return [output, i + 1] if depth.zero?
when "'"
state = :single_quoted
when "\""
state = :double_quoted
end
output << c
when :single_quoted
output << c
state = :unquoted if c == "'"
when :double_quoted
output << c
state = :unquoted if c == "\""
end
i += 1
end
raise ArgumentError, "Unmatched $(...)"
StringParser.read_dollar_paren(line, start_index)
end
def read_arithmetic(line, start_index)
@ -315,6 +279,18 @@ module Shell
stdout.tr("\n", " ")
end
def escape_substitution_output(value, context)
escaped = value.gsub("$", ESCAPED_DOLLAR)
case context
when :double_quoted
escaped.gsub(/([\\"])/, '\\\\\1')
when :unquoted
escaped.gsub(/(\\|["'])/, '\\\\\1')
else
escaped
end
end
def expand_arithmetic(expr)
tokens = tokenize_arithmetic(expr)
rpn = arithmetic_to_rpn(tokens)

View file

@ -44,6 +44,10 @@ class ShellTest < Minitest::Test
assert_equal "a b", `#{A1_PATH} -c 'echo \"a b\"'`.chomp
end
def test_respects_escaped_double_quote_in_double_quotes
assert_equal "a\"b", `#{A1_PATH} -c 'echo \"a\\\"b\"'`.chomp
end
def test_respects_single_quotes
assert_equal "a b", `#{A1_PATH} -c \"echo 'a b'\"`.chomp
end
@ -62,6 +66,16 @@ class ShellTest < Minitest::Test
FileUtils.rm_f("globtest_b.txt")
end
def test_does_not_reglob_expanded_paths
File.write("globspecial_a.txt", TRIVIAL_SHELL_SCRIPT)
File.write("globspecial_[a].txt", TRIVIAL_SHELL_SCRIPT)
output = `#{A1_PATH} -c 'echo globspecial_*.txt'`.chomp.split
assert_equal ["globspecial_[a].txt", "globspecial_a.txt"], output.sort
ensure
FileUtils.rm_f("globspecial_a.txt")
FileUtils.rm_f("globspecial_[a].txt")
end
def test_does_not_expand_escaped_dollar
assert_equal "$HOME", `#{A1_PATH} -c 'echo \\$HOME'`.chomp
end
@ -78,6 +92,10 @@ class ShellTest < Minitest::Test
assert_equal "hi", `#{A1_PATH} -c 'echo $(echo hi)'`.chomp
end
def test_expands_command_substitution_with_escaped_quote
assert_equal "a\"b", `#{A1_PATH} -c 'echo $(printf \"%s\" \"a\\\"b\")'`.chomp
end
def test_expands_arithmetic
assert_equal "3", `#{A1_PATH} -c 'echo $((1 + 2))'`.chomp
end