mirror of
https://github.com/samsonjs/csc360-a1-shell.git
synced 2026-03-25 08:45:52 +00:00
Replace command splitter with tokenizer parser
This commit is contained in:
parent
6a29c97314
commit
beee5deca5
3 changed files with 365 additions and 64 deletions
|
|
@ -1,76 +1,368 @@
|
|||
require "shell/quote_cursor"
|
||||
|
||||
module Shell
|
||||
class StringParser
|
||||
class << self
|
||||
def split_commands(line)
|
||||
commands = []
|
||||
command = +""
|
||||
cursor = QuoteCursor.new
|
||||
next_op = :always
|
||||
i = 0
|
||||
Token = Struct.new(:type, :value, keyword_init: true)
|
||||
|
||||
while i < line.length
|
||||
c = line[i]
|
||||
if cursor.unquoted?
|
||||
case c
|
||||
when ";"
|
||||
commands << {command: command, op: next_op}
|
||||
command = +""
|
||||
next_op = :always
|
||||
i += 1
|
||||
next
|
||||
when "&"
|
||||
if line[i + 1] == "&"
|
||||
if command.strip.empty?
|
||||
raise ArgumentError, "syntax error near unexpected token `&&`"
|
||||
end
|
||||
commands << {command: command, op: next_op}
|
||||
command = +""
|
||||
next_op = :and
|
||||
i += 2
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
segment, i = cursor.consume(line, i)
|
||||
command << segment
|
||||
end
|
||||
|
||||
if next_op == :and && command.strip.empty?
|
||||
raise ArgumentError, "syntax error: expected command after `&&`"
|
||||
end
|
||||
|
||||
commands << {command: command, op: next_op}
|
||||
commands
|
||||
class Scanner
|
||||
def initialize(line, index: 0)
|
||||
@line = line
|
||||
@index = index
|
||||
end
|
||||
|
||||
def read_dollar_paren(line, start_index)
|
||||
output = +""
|
||||
i = start_index
|
||||
depth = 1
|
||||
cursor = QuoteCursor.new
|
||||
attr_reader :index
|
||||
|
||||
while i < line.length
|
||||
c = line[i]
|
||||
def tokenize_command_list
|
||||
tokens = []
|
||||
segment_start = index
|
||||
|
||||
if cursor.unquoted?
|
||||
case c
|
||||
when "("
|
||||
depth += 1
|
||||
when ")"
|
||||
depth -= 1
|
||||
return [output, i + 1] if depth.zero?
|
||||
end
|
||||
until eof?
|
||||
c = current_char
|
||||
if c == ";"
|
||||
tokens << Token.new(type: :text, value: @line[segment_start...index])
|
||||
tokens << Token.new(type: :separator, value: :always)
|
||||
advance
|
||||
segment_start = index
|
||||
next
|
||||
end
|
||||
|
||||
segment, i = cursor.consume(line, i)
|
||||
output << segment
|
||||
if c == "&" && peek(1) == "&"
|
||||
tokens << Token.new(type: :text, value: @line[segment_start...index])
|
||||
tokens << Token.new(type: :separator, value: :and)
|
||||
advance(2)
|
||||
segment_start = index
|
||||
next
|
||||
end
|
||||
|
||||
case c
|
||||
when "\\"
|
||||
advance_escape
|
||||
when "'"
|
||||
skip_single_quoted
|
||||
when "\""
|
||||
skip_double_quoted
|
||||
when "`"
|
||||
skip_backtick
|
||||
when "$"
|
||||
if peek(1) == "("
|
||||
if peek(2) == "("
|
||||
skip_arithmetic_substitution
|
||||
else
|
||||
skip_command_substitution
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
end
|
||||
|
||||
tokens << Token.new(type: :text, value: @line[segment_start...index])
|
||||
tokens
|
||||
end
|
||||
|
||||
# Reads the contents and end-index for a command substitution body, where
|
||||
# index points to the first character after "$(".
|
||||
def read_dollar_paren_body
|
||||
output = +""
|
||||
depth = 1
|
||||
|
||||
until eof?
|
||||
c = current_char
|
||||
|
||||
if c == "\\"
|
||||
output << read_escape
|
||||
next
|
||||
end
|
||||
|
||||
if c == "'"
|
||||
output << read_single_quoted
|
||||
next
|
||||
end
|
||||
|
||||
if c == "\""
|
||||
output << read_double_quoted
|
||||
next
|
||||
end
|
||||
|
||||
if c == "`"
|
||||
output << read_backtick
|
||||
next
|
||||
end
|
||||
|
||||
if c == "$" && peek(1) == "("
|
||||
if peek(2) == "("
|
||||
output << read_arithmetic_substitution
|
||||
else
|
||||
output << "$("
|
||||
advance(2)
|
||||
depth += 1
|
||||
end
|
||||
next
|
||||
end
|
||||
|
||||
if c == "("
|
||||
output << c
|
||||
depth += 1
|
||||
advance
|
||||
next
|
||||
end
|
||||
|
||||
if c == ")"
|
||||
depth -= 1
|
||||
if depth.zero?
|
||||
return [output, index + 1]
|
||||
end
|
||||
output << c
|
||||
advance
|
||||
next
|
||||
end
|
||||
|
||||
output << c
|
||||
advance
|
||||
end
|
||||
|
||||
raise ArgumentError, "Unmatched $(...)"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def eof?
|
||||
index >= @line.length
|
||||
end
|
||||
|
||||
def current_char
|
||||
@line[index]
|
||||
end
|
||||
|
||||
def peek(offset)
|
||||
@line[index + offset]
|
||||
end
|
||||
|
||||
def advance(count = 1)
|
||||
@index += count
|
||||
end
|
||||
|
||||
def advance_escape
|
||||
advance
|
||||
advance unless eof?
|
||||
end
|
||||
|
||||
def skip_single_quoted
|
||||
advance # opening quote
|
||||
until eof?
|
||||
if current_char == "'"
|
||||
advance
|
||||
return
|
||||
end
|
||||
advance
|
||||
end
|
||||
raise ArgumentError, "Unmatched quote"
|
||||
end
|
||||
|
||||
def skip_double_quoted
|
||||
advance # opening quote
|
||||
until eof?
|
||||
c = current_char
|
||||
case c
|
||||
when "\\"
|
||||
advance_escape
|
||||
when "\""
|
||||
advance
|
||||
return
|
||||
when "`"
|
||||
skip_backtick
|
||||
when "$"
|
||||
if peek(1) == "("
|
||||
if peek(2) == "("
|
||||
skip_arithmetic_substitution
|
||||
else
|
||||
skip_command_substitution
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
end
|
||||
raise ArgumentError, "Unmatched quote"
|
||||
end
|
||||
|
||||
def skip_backtick
|
||||
advance # opening backtick
|
||||
until eof?
|
||||
c = current_char
|
||||
case c
|
||||
when "\\"
|
||||
advance_escape
|
||||
when "`"
|
||||
advance
|
||||
return
|
||||
when "$"
|
||||
if peek(1) == "("
|
||||
if peek(2) == "("
|
||||
skip_arithmetic_substitution
|
||||
else
|
||||
skip_command_substitution
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
end
|
||||
raise ArgumentError, "Unmatched backtick"
|
||||
end
|
||||
|
||||
def skip_command_substitution
|
||||
advance(2) # consume "$("
|
||||
depth = 1
|
||||
|
||||
until eof?
|
||||
c = current_char
|
||||
case c
|
||||
when "\\"
|
||||
advance_escape
|
||||
when "'"
|
||||
skip_single_quoted
|
||||
when "\""
|
||||
skip_double_quoted
|
||||
when "`"
|
||||
skip_backtick
|
||||
when "$"
|
||||
if peek(1) == "("
|
||||
if peek(2) == "("
|
||||
skip_arithmetic_substitution
|
||||
else
|
||||
advance(2)
|
||||
depth += 1
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
when "("
|
||||
advance
|
||||
depth += 1
|
||||
when ")"
|
||||
advance
|
||||
depth -= 1
|
||||
return if depth.zero?
|
||||
else
|
||||
advance
|
||||
end
|
||||
end
|
||||
|
||||
raise ArgumentError, "Unmatched $(...)"
|
||||
end
|
||||
|
||||
def skip_arithmetic_substitution
|
||||
advance(3) # consume "$(("
|
||||
depth = 1
|
||||
|
||||
until eof?
|
||||
c = current_char
|
||||
case c
|
||||
when "\\"
|
||||
advance_escape
|
||||
when "'"
|
||||
skip_single_quoted
|
||||
when "\""
|
||||
skip_double_quoted
|
||||
when "`"
|
||||
skip_backtick
|
||||
when "$"
|
||||
if peek(1) == "("
|
||||
if peek(2) == "("
|
||||
advance(3)
|
||||
depth += 1
|
||||
else
|
||||
skip_command_substitution
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
when ")"
|
||||
if peek(1) == ")"
|
||||
advance(2)
|
||||
depth -= 1
|
||||
return if depth.zero?
|
||||
else
|
||||
advance
|
||||
end
|
||||
else
|
||||
advance
|
||||
end
|
||||
end
|
||||
|
||||
raise ArgumentError, "Unmatched $((...))"
|
||||
end
|
||||
|
||||
def read_escape
|
||||
start = index
|
||||
advance_escape
|
||||
@line[start...index]
|
||||
end
|
||||
|
||||
def read_single_quoted
|
||||
start = index
|
||||
skip_single_quoted
|
||||
@line[start...index]
|
||||
end
|
||||
|
||||
def read_double_quoted
|
||||
start = index
|
||||
skip_double_quoted
|
||||
@line[start...index]
|
||||
end
|
||||
|
||||
def read_backtick
|
||||
start = index
|
||||
skip_backtick
|
||||
@line[start...index]
|
||||
end
|
||||
|
||||
def read_arithmetic_substitution
|
||||
start = index
|
||||
skip_arithmetic_substitution
|
||||
@line[start...index]
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
def split_commands(line)
|
||||
commands = []
|
||||
next_op = :always
|
||||
tokens = Scanner.new(line).tokenize_command_list
|
||||
|
||||
tokens.each do |token|
|
||||
case token.type
|
||||
when :text
|
||||
commands << {command: token.value, op: next_op}
|
||||
if next_op == :and && token.value.strip.empty?
|
||||
raise ArgumentError, "syntax error: expected command after `&&`"
|
||||
end
|
||||
next_op = :always
|
||||
when :separator
|
||||
if token.value == :and
|
||||
if commands.empty? || commands.last[:command].strip.empty?
|
||||
raise ArgumentError, "syntax error near unexpected token `&&`"
|
||||
end
|
||||
next_op = :and
|
||||
else
|
||||
next_op = :always
|
||||
end
|
||||
else
|
||||
raise ArgumentError, "Unknown token type: #{token.type}"
|
||||
end
|
||||
end
|
||||
|
||||
commands
|
||||
end
|
||||
|
||||
def read_dollar_paren(line, start_index)
|
||||
Scanner.new(line, index: start_index).read_dollar_paren_body
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
require "shellwords"
|
||||
require "open3"
|
||||
require "shell/quote_cursor"
|
||||
require "shell/string_parser"
|
||||
|
|
@ -54,12 +53,12 @@ module Shell
|
|||
end
|
||||
end
|
||||
|
||||
# Lifted directly from Ruby 4.0.0.
|
||||
# Adapted from Ruby's Shellwords splitting logic.
|
||||
#
|
||||
# Splits a string into an array of tokens in the same way the UNIX
|
||||
# Bourne shell does.
|
||||
#
|
||||
# argv = Shellwords.split('here are "two words"')
|
||||
# argv = shellsplit('here are "two words"')
|
||||
# argv #=> ["here", "are", "two words"]
|
||||
#
|
||||
# +line+ must not contain NUL characters because of nature of
|
||||
|
|
@ -69,7 +68,7 @@ module Shell
|
|||
# metacharacters except for the single and double quotes and
|
||||
# backslash are not treated as such.
|
||||
#
|
||||
# argv = Shellwords.split('ruby my_prog.rb | less')
|
||||
# argv = shellsplit('ruby my_prog.rb | less')
|
||||
# argv #=> ["ruby", "my_prog.rb", "|", "less"]
|
||||
#
|
||||
# String#shellsplit is a shortcut for this function.
|
||||
|
|
|
|||
|
|
@ -93,6 +93,16 @@ class ShellTest < Minitest::Test
|
|||
assert_equal "hi", `#{A1_PATH} -c 'echo $(echo hi)'`.chomp
|
||||
end
|
||||
|
||||
def test_keeps_control_operators_inside_command_substitution
|
||||
semicolon_stdout, semicolon_stderr, semicolon_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi; echo bye)")
|
||||
assert semicolon_status.success?, semicolon_stderr
|
||||
assert_equal "hi bye\n", semicolon_stdout
|
||||
|
||||
and_stdout, and_stderr, and_status = Open3.capture3(A1_PATH, "-c", "echo $(echo hi && echo bye)")
|
||||
assert and_status.success?, and_stderr
|
||||
assert_equal "hi bye\n", and_stdout
|
||||
end
|
||||
|
||||
def test_expands_command_substitution_with_escaped_quote
|
||||
assert_equal "a\"b", `#{A1_PATH} -c 'echo $(printf \"%s\" \"a\\\"b\")'`.chomp
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in a new issue