diff --git a/ruby/shell/string_parser.rb b/ruby/shell/string_parser.rb index 0379b1a..d64ce72 100644 --- a/ruby/shell/string_parser.rb +++ b/ruby/shell/string_parser.rb @@ -1,76 +1,368 @@ -require "shell/quote_cursor" - module Shell class StringParser - class << self - def split_commands(line) - commands = [] - command = +"" - cursor = QuoteCursor.new - next_op = :always - i = 0 + Token = Struct.new(:type, :value, keyword_init: true) - while i < line.length - c = line[i] - if cursor.unquoted? - case c - when ";" - commands << {command: command, op: next_op} - command = +"" - next_op = :always - i += 1 - next - when "&" - if line[i + 1] == "&" - if command.strip.empty? - raise ArgumentError, "syntax error near unexpected token `&&`" - end - commands << {command: command, op: next_op} - command = +"" - next_op = :and - i += 2 - next - end - end - end - - segment, i = cursor.consume(line, i) - command << segment - end - - if next_op == :and && command.strip.empty? - raise ArgumentError, "syntax error: expected command after `&&`" - end - - commands << {command: command, op: next_op} - commands + class Scanner + def initialize(line, index: 0) + @line = line + @index = index end - def read_dollar_paren(line, start_index) - output = +"" - i = start_index - depth = 1 - cursor = QuoteCursor.new + attr_reader :index - while i < line.length - c = line[i] + def tokenize_command_list + tokens = [] + segment_start = index - if cursor.unquoted? - case c - when "(" - depth += 1 - when ")" - depth -= 1 - return [output, i + 1] if depth.zero? - end + until eof? + c = current_char + if c == ";" + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens << Token.new(type: :separator, value: :always) + advance + segment_start = index + next end - segment, i = cursor.consume(line, i) - output << segment + if c == "&" && peek(1) == "&" + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens << Token.new(type: :separator, value: :and) + advance(2) + segment_start = index + next + end + + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + + tokens << Token.new(type: :text, value: @line[segment_start...index]) + tokens + end + + # Reads the contents and end-index for a command substitution body, where + # index points to the first character after "$(". + def read_dollar_paren_body + output = +"" + depth = 1 + + until eof? + c = current_char + + if c == "\\" + output << read_escape + next + end + + if c == "'" + output << read_single_quoted + next + end + + if c == "\"" + output << read_double_quoted + next + end + + if c == "`" + output << read_backtick + next + end + + if c == "$" && peek(1) == "(" + if peek(2) == "(" + output << read_arithmetic_substitution + else + output << "$(" + advance(2) + depth += 1 + end + next + end + + if c == "(" + output << c + depth += 1 + advance + next + end + + if c == ")" + depth -= 1 + if depth.zero? + return [output, index + 1] + end + output << c + advance + next + end + + output << c + advance end raise ArgumentError, "Unmatched $(...)" end + + private + + def eof? + index >= @line.length + end + + def current_char + @line[index] + end + + def peek(offset) + @line[index + offset] + end + + def advance(count = 1) + @index += count + end + + def advance_escape + advance + advance unless eof? + end + + def skip_single_quoted + advance # opening quote + until eof? + if current_char == "'" + advance + return + end + advance + end + raise ArgumentError, "Unmatched quote" + end + + def skip_double_quoted + advance # opening quote + until eof? + c = current_char + case c + when "\\" + advance_escape + when "\"" + advance + return + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + raise ArgumentError, "Unmatched quote" + end + + def skip_backtick + advance # opening backtick + until eof? + c = current_char + case c + when "\\" + advance_escape + when "`" + advance + return + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + skip_command_substitution + end + else + advance + end + else + advance + end + end + raise ArgumentError, "Unmatched backtick" + end + + def skip_command_substitution + advance(2) # consume "$(" + depth = 1 + + until eof? + c = current_char + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + skip_arithmetic_substitution + else + advance(2) + depth += 1 + end + else + advance + end + when "(" + advance + depth += 1 + when ")" + advance + depth -= 1 + return if depth.zero? + else + advance + end + end + + raise ArgumentError, "Unmatched $(...)" + end + + def skip_arithmetic_substitution + advance(3) # consume "$((" + depth = 1 + + until eof? + c = current_char + case c + when "\\" + advance_escape + when "'" + skip_single_quoted + when "\"" + skip_double_quoted + when "`" + skip_backtick + when "$" + if peek(1) == "(" + if peek(2) == "(" + advance(3) + depth += 1 + else + skip_command_substitution + end + else + advance + end + when ")" + if peek(1) == ")" + advance(2) + depth -= 1 + return if depth.zero? + else + advance + end + else + advance + end + end + + raise ArgumentError, "Unmatched $((...))" + end + + def read_escape + start = index + advance_escape + @line[start...index] + end + + def read_single_quoted + start = index + skip_single_quoted + @line[start...index] + end + + def read_double_quoted + start = index + skip_double_quoted + @line[start...index] + end + + def read_backtick + start = index + skip_backtick + @line[start...index] + end + + def read_arithmetic_substitution + start = index + skip_arithmetic_substitution + @line[start...index] + end + end + + class << self + def split_commands(line) + commands = [] + next_op = :always + tokens = Scanner.new(line).tokenize_command_list + + tokens.each do |token| + case token.type + when :text + commands << {command: token.value, op: next_op} + if next_op == :and && token.value.strip.empty? + raise ArgumentError, "syntax error: expected command after `&&`" + end + next_op = :always + when :separator + if token.value == :and + if commands.empty? || commands.last[:command].strip.empty? + raise ArgumentError, "syntax error near unexpected token `&&`" + end + next_op = :and + else + next_op = :always + end + else + raise ArgumentError, "Unknown token type: #{token.type}" + end + end + + commands + end + + def read_dollar_paren(line, start_index) + Scanner.new(line, index: start_index).read_dollar_paren_body + end end end end diff --git a/ruby/shell/word_expander.rb b/ruby/shell/word_expander.rb index 9889289..b02308f 100644 --- a/ruby/shell/word_expander.rb +++ b/ruby/shell/word_expander.rb @@ -1,4 +1,3 @@ -require "shellwords" require "open3" require "shell/quote_cursor" require "shell/string_parser" @@ -54,12 +53,12 @@ module Shell end end - # Lifted directly from Ruby 4.0.0. + # Adapted from Ruby's Shellwords splitting logic. # # Splits a string into an array of tokens in the same way the UNIX # Bourne shell does. # - # argv = Shellwords.split('here are "two words"') + # argv = shellsplit('here are "two words"') # argv #=> ["here", "are", "two words"] # # +line+ must not contain NUL characters because of nature of @@ -69,7 +68,7 @@ module Shell # metacharacters except for the single and double quotes and # backslash are not treated as such. # - # argv = Shellwords.split('ruby my_prog.rb | less') + # argv = shellsplit('ruby my_prog.rb | less') # argv #=> ["ruby", "my_prog.rb", "|", "less"] # # String#shellsplit is a shortcut for this function. diff --git a/ruby/test/shell_test.rb b/ruby/test/shell_test.rb index fa8a141..41bcb03 100644 --- a/ruby/test/shell_test.rb +++ b/ruby/test/shell_test.rb @@ -93,6 +93,16 @@ class ShellTest < Minitest::Test assert_equal "hi", `#{A1_PATH} -c 'echo $(echo hi)'`.chomp end + def test_keeps_control_operators_inside_command_substitution + semicolon_stdout, semicolon_stderr, semicolon_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi; echo bye)") + assert semicolon_status.success?, semicolon_stderr + assert_equal "hi bye\n", semicolon_stdout + + and_stdout, and_stderr, and_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi && echo bye)") + assert and_status.success?, and_stderr + assert_equal "hi bye\n", and_stdout + end + def test_expands_command_substitution_with_escaped_quote assert_equal "a\"b", `#{A1_PATH} -c 'echo $(printf \"%s\" \"a\\\"b\")'`.chomp end