mirror of
https://github.com/samsonjs/samhuri.net.git
synced 2026-03-25 09:05:47 +00:00
* Publish on gemini in addition to the web * Publish gemini feeds, add link from web, tweak things
257 lines
8 KiB
Ruby
257 lines
8 KiB
Ruby
require "cgi"
|
|
|
|
module Pressa
|
|
module Utils
|
|
class GemtextRenderer
|
|
class << self
|
|
def render(markdown)
|
|
lines = markdown.to_s.gsub("\r\n", "\n").split("\n")
|
|
link_reference_definitions = extract_link_reference_definitions(lines)
|
|
output_lines = []
|
|
in_preformatted_block = false
|
|
|
|
lines.each do |line|
|
|
if line.start_with?("```")
|
|
output_lines << "```"
|
|
in_preformatted_block = !in_preformatted_block
|
|
next
|
|
end
|
|
|
|
if in_preformatted_block
|
|
output_lines << line
|
|
next
|
|
end
|
|
|
|
next if link_reference_definition?(line)
|
|
|
|
converted_lines = convert_line(line, link_reference_definitions)
|
|
output_lines.concat(converted_lines)
|
|
end
|
|
|
|
squish_blank_lines(output_lines).join("\n").strip
|
|
end
|
|
|
|
private
|
|
|
|
def convert_line(line, link_reference_definitions)
|
|
stripped = line.strip
|
|
return [""] if stripped.empty?
|
|
|
|
return convert_heading(stripped, link_reference_definitions) if heading_line?(stripped)
|
|
return convert_list_item(stripped, link_reference_definitions) if list_item_line?(stripped)
|
|
return convert_quote_line(stripped, link_reference_definitions) if quote_line?(stripped)
|
|
|
|
convert_text_line(line, link_reference_definitions)
|
|
end
|
|
|
|
def convert_heading(line, link_reference_definitions)
|
|
marker, text = line.split(/\s+/, 2)
|
|
heading_text, links = extract_links(text.to_s, link_reference_definitions)
|
|
rows = []
|
|
rows << "#{marker} #{clean_inline_text(heading_text)}".strip
|
|
rows.concat(render_link_rows(links))
|
|
rows
|
|
end
|
|
|
|
def convert_list_item(line, link_reference_definitions)
|
|
text = line.sub(/\A[-*+]\s+/, "")
|
|
if link_only_list_item?(text, link_reference_definitions)
|
|
_clean_text, links = extract_links(text, link_reference_definitions)
|
|
return render_link_rows(links)
|
|
end
|
|
|
|
clean_text, links = extract_links(text, link_reference_definitions)
|
|
rows = []
|
|
rows << "* #{clean_inline_text(clean_text)}".strip
|
|
rows.concat(render_link_rows(links))
|
|
rows
|
|
end
|
|
|
|
def convert_quote_line(line, link_reference_definitions)
|
|
text = line.sub(/\A>\s?/, "")
|
|
clean_text, links = extract_links(text, link_reference_definitions)
|
|
rows = []
|
|
rows << "> #{clean_inline_text(clean_text)}".strip
|
|
rows.concat(render_link_rows(links))
|
|
rows
|
|
end
|
|
|
|
def convert_text_line(line, link_reference_definitions)
|
|
clean_text, links = extract_links(line, link_reference_definitions)
|
|
if !links.empty? && clean_inline_text(strip_links_from_text(line)).empty?
|
|
return render_link_rows(links)
|
|
end
|
|
|
|
rows = []
|
|
inline_text = clean_inline_text(clean_text)
|
|
rows << inline_text unless inline_text.empty?
|
|
rows.concat(render_link_rows(links))
|
|
rows.empty? ? [""] : rows
|
|
end
|
|
|
|
def extract_links(text, link_reference_definitions)
|
|
links = []
|
|
work = text.dup
|
|
|
|
work.gsub!(%r{<a\s+[^>]*href=["']([^"']+)["'][^>]*>(.*?)</a>}i) do
|
|
url = Regexp.last_match(1)
|
|
label = clean_inline_text(strip_html_tags(Regexp.last_match(2)))
|
|
links << [url, label]
|
|
label
|
|
end
|
|
|
|
work.gsub!(/\[([^\]]+)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/) do
|
|
label = clean_inline_text(Regexp.last_match(1))
|
|
url = Regexp.last_match(2)
|
|
links << [url, label]
|
|
label
|
|
end
|
|
|
|
work.gsub!(/\[([^\]]+)\]\[([^\]]*)\]/) do
|
|
label_text = Regexp.last_match(1)
|
|
reference_key = Regexp.last_match(2)
|
|
reference_key = label_text if reference_key.strip.empty?
|
|
url = resolve_link_reference(link_reference_definitions, reference_key)
|
|
next Regexp.last_match(0) unless url
|
|
|
|
label = clean_inline_text(label_text)
|
|
links << [url, label]
|
|
label
|
|
end
|
|
|
|
work.scan(/(?:href|src)=["']([^"']+)["']/i) do |match|
|
|
url = match.first
|
|
next if links.any? { |(existing_url, _)| existing_url == url }
|
|
|
|
links << [url, fallback_label(url)]
|
|
end
|
|
|
|
[work, links]
|
|
end
|
|
|
|
def resolve_link_reference(link_reference_definitions, key)
|
|
link_reference_definitions[normalize_link_reference_key(key)]
|
|
end
|
|
|
|
def link_only_list_item?(text, link_reference_definitions)
|
|
_clean_text, links = extract_links(text, link_reference_definitions)
|
|
return false if links.empty?
|
|
|
|
remaining_text = strip_links_from_text(text)
|
|
normalized_remaining = clean_inline_text(remaining_text)
|
|
return true if normalized_remaining.empty?
|
|
|
|
links_count = links.length
|
|
links_count == 1 && normalized_remaining.match?(/\A[\w@.+\-\/ ]+:\z/)
|
|
end
|
|
|
|
def extract_link_reference_definitions(lines)
|
|
links = {}
|
|
lines.each do |line|
|
|
match = line.match(/\A\s{0,3}\[([^\]]+)\]:\s*(\S+)/)
|
|
next unless match
|
|
|
|
key = normalize_link_reference_key(match[1])
|
|
value = match[2]
|
|
value = value[1..-2] if value.start_with?("<") && value.end_with?(">")
|
|
links[key] = value
|
|
end
|
|
links
|
|
end
|
|
|
|
def normalize_link_reference_key(key)
|
|
key.to_s.strip.downcase.gsub(/\s+/, " ")
|
|
end
|
|
|
|
def strip_links_from_text(text)
|
|
work = text.dup
|
|
work.gsub!(%r{<a\s+[^>]*href=["'][^"']+["'][^>]*>.*?</a>}i, "")
|
|
work.gsub!(/\[([^\]]+)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/, "")
|
|
work.gsub!(/\[([^\]]+)\]\[([^\]]*)\]/, "")
|
|
work
|
|
end
|
|
|
|
def render_link_rows(links)
|
|
links.filter_map do |url, label|
|
|
next nil if url.nil? || url.strip.empty?
|
|
"=> #{url}"
|
|
end
|
|
end
|
|
|
|
def clean_inline_text(text)
|
|
cleaned = text.to_s.dup
|
|
cleaned = strip_html_tags(cleaned)
|
|
cleaned.gsub!(/`([^`]+)`/, '\1')
|
|
cleaned.gsub!(/\*\*([^*]+)\*\*/, '\1')
|
|
cleaned.gsub!(/__([^_]+)__/, '\1')
|
|
cleaned.gsub!(/\*([^*]+)\*/, '\1')
|
|
cleaned.gsub!(/_([^_]+)_/, '\1')
|
|
cleaned.gsub!(/\s+/, " ")
|
|
cleaned = CGI.unescapeHTML(cleaned)
|
|
cleaned = decode_named_html_entities(cleaned)
|
|
cleaned.strip
|
|
end
|
|
|
|
def decode_named_html_entities(text)
|
|
text.gsub(/&([A-Za-z]+);/) do
|
|
entity = Regexp.last_match(1).downcase
|
|
|
|
case entity
|
|
when "darr" then "\u2193"
|
|
when "uarr" then "\u2191"
|
|
when "larr" then "\u2190"
|
|
when "rarr" then "\u2192"
|
|
when "hellip" then "..."
|
|
when "nbsp" then " "
|
|
else
|
|
"&#{Regexp.last_match(1)};"
|
|
end
|
|
end
|
|
end
|
|
|
|
def strip_html_tags(text)
|
|
text.gsub(/<[^>]+>/, "")
|
|
end
|
|
|
|
def fallback_label(url)
|
|
uri_path = url.split("?").first
|
|
basename = File.basename(uri_path.to_s)
|
|
return url if basename.nil? || basename.empty? || basename == "/"
|
|
|
|
basename
|
|
end
|
|
|
|
def heading_line?(line)
|
|
line.match?(/\A\#{1,3}\s+/)
|
|
end
|
|
|
|
def list_item_line?(line)
|
|
line.match?(/\A[-*+]\s+/)
|
|
end
|
|
|
|
def quote_line?(line)
|
|
line.start_with?(">")
|
|
end
|
|
|
|
def link_reference_definition?(line)
|
|
line.match?(/\A\s{0,3}\[[^\]]+\]:\s+\S/)
|
|
end
|
|
|
|
def squish_blank_lines(lines)
|
|
output = []
|
|
previous_blank = false
|
|
|
|
lines.each do |line|
|
|
blank = line.strip.empty?
|
|
next if blank && previous_blank
|
|
|
|
output << line
|
|
previous_blank = blank
|
|
end
|
|
|
|
output
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|