require "cgi"
module Pressa
module Utils
class GemtextRenderer
class << self
def render(markdown)
lines = markdown.to_s.gsub("\r\n", "\n").split("\n")
link_reference_definitions = extract_link_reference_definitions(lines)
output_lines = []
in_preformatted_block = false
lines.each do |line|
if line.start_with?("```")
output_lines << "```"
in_preformatted_block = !in_preformatted_block
next
end
if in_preformatted_block
output_lines << line
next
end
next if link_reference_definition?(line)
converted_lines = convert_line(line, link_reference_definitions)
output_lines.concat(converted_lines)
end
squish_blank_lines(output_lines).join("\n").strip
end
private
def convert_line(line, link_reference_definitions)
stripped = line.strip
return [""] if stripped.empty?
return convert_heading(stripped, link_reference_definitions) if heading_line?(stripped)
return convert_list_item(stripped, link_reference_definitions) if list_item_line?(stripped)
return convert_quote_line(stripped, link_reference_definitions) if quote_line?(stripped)
convert_text_line(line, link_reference_definitions)
end
def convert_heading(line, link_reference_definitions)
marker, text = line.split(/\s+/, 2)
heading_text, links = extract_links(text.to_s, link_reference_definitions)
rows = []
rows << "#{marker} #{clean_inline_text(heading_text)}".strip
rows.concat(render_link_rows(links))
rows
end
def convert_list_item(line, link_reference_definitions)
text = line.sub(/\A[-*+]\s+/, "")
if link_only_list_item?(text, link_reference_definitions)
_clean_text, links = extract_links(text, link_reference_definitions)
return render_link_rows(links)
end
clean_text, links = extract_links(text, link_reference_definitions)
rows = []
rows << "* #{clean_inline_text(clean_text)}".strip
rows.concat(render_link_rows(links))
rows
end
def convert_quote_line(line, link_reference_definitions)
text = line.sub(/\A>\s?/, "")
clean_text, links = extract_links(text, link_reference_definitions)
rows = []
rows << "> #{clean_inline_text(clean_text)}".strip
rows.concat(render_link_rows(links))
rows
end
def convert_text_line(line, link_reference_definitions)
clean_text, links = extract_links(line, link_reference_definitions)
if !links.empty? && clean_inline_text(strip_links_from_text(line)).empty?
return render_link_rows(links)
end
rows = []
inline_text = clean_inline_text(clean_text)
rows << inline_text unless inline_text.empty?
rows.concat(render_link_rows(links))
rows.empty? ? [""] : rows
end
def extract_links(text, link_reference_definitions)
links = []
work = text.dup
work.gsub!(%r{]*href=["']([^"']+)["'][^>]*>(.*?)}i) do
url = Regexp.last_match(1)
label = clean_inline_text(strip_html_tags(Regexp.last_match(2)))
links << [url, label]
label
end
work.gsub!(/\[([^\]]+)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/) do
label = clean_inline_text(Regexp.last_match(1))
url = Regexp.last_match(2)
links << [url, label]
label
end
work.gsub!(/\[([^\]]+)\]\[([^\]]*)\]/) do
label_text = Regexp.last_match(1)
reference_key = Regexp.last_match(2)
reference_key = label_text if reference_key.strip.empty?
url = resolve_link_reference(link_reference_definitions, reference_key)
next Regexp.last_match(0) unless url
label = clean_inline_text(label_text)
links << [url, label]
label
end
work.scan(/(?:href|src)=["']([^"']+)["']/i) do |match|
url = match.first
next if links.any? { |(existing_url, _)| existing_url == url }
links << [url, fallback_label(url)]
end
[work, links]
end
def resolve_link_reference(link_reference_definitions, key)
link_reference_definitions[normalize_link_reference_key(key)]
end
def link_only_list_item?(text, link_reference_definitions)
_clean_text, links = extract_links(text, link_reference_definitions)
return false if links.empty?
remaining_text = strip_links_from_text(text)
normalized_remaining = clean_inline_text(remaining_text)
return true if normalized_remaining.empty?
links_count = links.length
links_count == 1 && normalized_remaining.match?(/\A[\w@.+\-\/ ]+:\z/)
end
def extract_link_reference_definitions(lines)
links = {}
lines.each do |line|
match = line.match(/\A\s{0,3}\[([^\]]+)\]:\s*(\S+)/)
next unless match
key = normalize_link_reference_key(match[1])
value = match[2]
value = value[1..-2] if value.start_with?("<") && value.end_with?(">")
links[key] = value
end
links
end
def normalize_link_reference_key(key)
key.to_s.strip.downcase.gsub(/\s+/, " ")
end
def strip_links_from_text(text)
work = text.dup
work.gsub!(%r{]*href=["'][^"']+["'][^>]*>.*?}i, "")
work.gsub!(/\[([^\]]+)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/, "")
work.gsub!(/\[([^\]]+)\]\[([^\]]*)\]/, "")
work
end
def render_link_rows(links)
links.filter_map do |url, label|
next nil if url.nil? || url.strip.empty?
"=> #{url}"
end
end
def clean_inline_text(text)
cleaned = text.to_s.dup
cleaned = strip_html_tags(cleaned)
cleaned.gsub!(/`([^`]+)`/, '\1')
cleaned.gsub!(/\*\*([^*]+)\*\*/, '\1')
cleaned.gsub!(/__([^_]+)__/, '\1')
cleaned.gsub!(/\*([^*]+)\*/, '\1')
cleaned.gsub!(/_([^_]+)_/, '\1')
cleaned.gsub!(/\s+/, " ")
cleaned = CGI.unescapeHTML(cleaned)
cleaned = decode_named_html_entities(cleaned)
cleaned.strip
end
def decode_named_html_entities(text)
text.gsub(/&([A-Za-z]+);/) do
entity = Regexp.last_match(1).downcase
case entity
when "darr" then "\u2193"
when "uarr" then "\u2191"
when "larr" then "\u2190"
when "rarr" then "\u2192"
when "hellip" then "..."
when "nbsp" then " "
else
"{Regexp.last_match(1)};"
end
end
end
def strip_html_tags(text)
text.gsub(/<[^>]+>/, "")
end
def fallback_label(url)
uri_path = url.split("?").first
basename = File.basename(uri_path.to_s)
return url if basename.nil? || basename.empty? || basename == "/"
basename
end
def heading_line?(line)
line.match?(/\A\#{1,3}\s+/)
end
def list_item_line?(line)
line.match?(/\A[-*+]\s+/)
end
def quote_line?(line)
line.start_with?(">")
end
def link_reference_definition?(line)
line.match?(/\A\s{0,3}\[[^\]]+\]:\s+\S/)
end
def squish_blank_lines(lines)
output = []
previous_blank = false
lines.each do |line|
blank = line.strip.empty?
next if blank && previous_blank
output << line
previous_blank = blank
end
output
end
end
end
end
end