add a class that knows how to manage this harp blog: HarpBlog

This commit is contained in:
Sami Samhuri 2014-10-18 01:37:39 -07:00
parent cf4b6e7a0a
commit 5c6399b558
11 changed files with 839 additions and 20 deletions

2
.gitignore vendored
View file

@ -3,4 +3,4 @@ node_modules
public/feed.xml
www
server/auth.json
server/spec/test-blog*

View file

@ -1 +1,2 @@
exclude = "{$exclude,www,node_modules,tweets,wayback}"
exclude = "{$exclude,www,node_modules,tweets,wayback,test-blog*}"
include = "{$include,.gitignore}"

View file

@ -1,8 +1,7 @@
source 'https://rubygems.org'
gem 'builder'
gem 'json'
gem 'mustache'
gem 'htmlentities'
gem 'rdiscount'
gem 'nokogiri'
gem 'css_parser'
gem 'rspec'
gem 'guard-rspec'

View file

@ -1,24 +1,61 @@
GEM
remote: https://rubygems.org/
specs:
addressable (2.3.5)
builder (3.0.0)
css_parser (1.3.5)
addressable
json (1.6.1)
mini_portile (0.5.2)
mustache (0.99.4)
nokogiri (1.6.1)
mini_portile (~> 0.5.0)
builder (3.2.2)
celluloid (0.16.0)
timers (~> 4.0.0)
coderay (1.1.0)
diff-lcs (1.2.5)
ffi (1.9.3)
formatador (0.2.5)
guard (2.6.1)
formatador (>= 0.2.4)
listen (~> 2.7)
lumberjack (~> 1.0)
pry (>= 0.9.12)
thor (>= 0.18.1)
guard-rspec (4.3.1)
guard (~> 2.1)
rspec (>= 2.14, < 4.0)
hitimes (1.2.2)
htmlentities (4.3.2)
listen (2.7.9)
celluloid (>= 0.15.2)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
lumberjack (1.0.9)
method_source (0.8.2)
pry (0.10.1)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
rb-fsevent (0.9.4)
rb-inotify (0.9.5)
ffi (>= 0.5.0)
rdiscount (1.6.8)
rspec (3.0.0)
rspec-core (~> 3.0.0)
rspec-expectations (~> 3.0.0)
rspec-mocks (~> 3.0.0)
rspec-core (3.0.4)
rspec-support (~> 3.0.0)
rspec-expectations (3.0.4)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.0.0)
rspec-mocks (3.0.4)
rspec-support (~> 3.0.0)
rspec-support (3.0.4)
slop (3.6.0)
thor (0.19.1)
timers (4.0.1)
hitimes
PLATFORMS
ruby
DEPENDENCIES
builder
css_parser
json
mustache
nokogiri
guard-rspec
htmlentities
rdiscount
rspec

View file

@ -12,4 +12,10 @@ publish_beta: compile
@echo
./bin/publish.sh --beta --delete
.PHONY: compile publish publish_beta
test_blog:
./bin/create-test-blog.sh server/spec/test-blog
spec:
cd server && rspec -f documentation
.PHONY: compile publish publish_beta test_blog spec

16
bin/create-test-blog.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/zsh
set -e # bail on errors
BLOG_PATH="$1"
ORIGIN_BLOG_PATH="${BLOG_PATH}-origin.git"
if [[ -e "$BLOG_PATH" ]]; then
echo ">>> Refusing to clobber $BLOG_PATH"
else
if [[ ! -e "$ORIGIN_BLOG_PATH" ]]; then
echo ">>> Mirroring local origin..."
git clone --mirror git@github.com:samsonjs/samhuri.net.git "$ORIGIN_BLOG_PATH"
fi
echo ">>> Cloning test blog from local origin..."
git clone "$ORIGIN_BLOG_PATH" "$BLOG_PATH"
fi

18
server/Guardfile Normal file
View file

@ -0,0 +1,18 @@
# A sample Guardfile
# More info at https://github.com/guard/guard#readme
# Note: The cmd option is now required due to the increasing number of ways
# rspec may be run, below are examples of the most common uses.
# * bundler: 'bundle exec rspec'
# * bundler binstubs: 'bin/rspec'
# * spring: 'bin/rsspec' (This will use spring if running and you have
# installed the spring binstubs per the docs)
# * zeus: 'zeus rspec' (requires the server to be started separetly)
# * 'just' rspec: 'rspec'
guard :rspec, cmd: 'bundle exec rspec' do
watch('auth.json') { 'spec/server_spec.rb' }
watch(%r{^(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
watch('spec/helpers.rb') { 'spec' }
watch(%r{^spec/.+_spec\.rb$})
end

369
server/harp_blog.rb Normal file
View file

@ -0,0 +1,369 @@
require 'fileutils'
require 'json'
require './web_title_finder'
class HarpBlog
class HarpBlogError < RuntimeError ; end
class InvalidDataError < HarpBlogError ; end
class PostExistsError < HarpBlogError ; end
class PostSaveError < HarpBlogError
attr_reader :original_error
def initialize(message, original_error)
super(message)
@original_error = original_error
end
end
class Post
PERSISTENT_FIELDS = %w[author title date timestamp link url tags].map(&:to_sym)
TRANSIENT_FIELDS = %w[time slug body].map(&:to_sym)
FIELDS = PERSISTENT_FIELDS + TRANSIENT_FIELDS
attr_accessor *FIELDS
def initialize(fields = nil)
if fields
FIELDS.each do |k|
if v = fields[k.to_s] || fields[k.to_sym]
instance_variable_set("@#{k}", v)
end
end
end
end
def persistent_fields
PERSISTENT_FIELDS.inject({}) do |h, k|
h[k] = send(k)
h
end
end
def fields
FIELDS.inject({}) do |h, k|
h[k] = send(k)
h
end
end
def link?
!!link
end
def title=(title)
@slug = nil
@title = title
end
def author
@author ||= 'Sami Samhuri'
end
def time
@time ||= @timestamp ? Time.at(@timestamp) : Time.now
end
def timestamp
@timestamp ||= time.to_i
end
def url
@url ||= "/posts/#{time.year}/#{padded_month}/#{slug}"
end
def slug
# TODO: be intelligent about unicode ... \p{Word} might help. negated char class with it?
if title
@slug ||= title.downcase.
gsub(/'/, '').
gsub(/[^[:alpha:]\d_]/, '-').
gsub(/^-+|-+$/, '').
gsub(/-+/, '-')
end
end
def date
@date ||= time.strftime('%B %d, %Y')
end
def tags
@tags ||= []
end
def padded_month
pad(time.month)
end
def pad(n)
n.to_i < 10 ? "0#{n}" : "#{n}"
end
end # Post
def initialize(path, dry_run = true, title_finder = nil)
@path = path
@dry_run = dry_run
@title_finder = title_finder || WebTitleFinder.new
end
def years
Dir[post_path('20*')].map { |x| File.basename(x) }.sort
end
def posts_for_year(year)
posts = []
1.upto(12) do |n|
month = n < 10 ? "0#{n}" : "#{n}"
posts += posts_for_month(year, month)
end
posts
end
def posts_for_month(year, month)
post_dir = post_path(year, month)
post_data = read_post_data(post_dir)
post_data.values.sort_by {|p| p['timestamp'] }.map {|p| Post.new(p) }
end
def get_post(year, month, slug)
post_dir = post_path(year, month)
post_filename = File.join(post_dir, "#{slug}.md")
post_data = read_post_data(post_dir)
if File.exist?(post_filename) && fields = post_data[slug]
fields[:body] = File.read(post_filename)
Post.new(fields)
elsif fields
message = "missing post body for #{year}/#{month}/#{slug}: #{post_filename}"
$stderr.puts "[HarpBlog#get_post] #{message}"
raise InvalidDataError.new(message)
elsif File.exist?(post_filename)
message = "missing metadata for #{year}/#{month}/#{slug}: #{post_dir}/_data.json"
$stderr.puts "[HarpBlog#get_post] #{message}"
raise InvalidDataError.new(message)
end
end
def create_post(title, body, link)
if !title || title.strip.length == 0
title = find_title(link)
end
unless title
raise "cannot find title for #{link}"
end
fields = {
title: title,
link: link,
body: body,
}
post = Post.new(fields)
year, month, slug = post.time.year, post.padded_month, post.slug
begin
existing_post = get_post(year.to_s, month, slug)
rescue InvalidDataError => e
$stderr.puts "[HarpBlog#create_post] deleting post with invalid data: #{e.message}"
delete_post(year.to_s, month, slug)
existing_post = nil
end
if existing_post
raise PostExistsError.new("post exists: #{year}/#{month}/#{slug}")
else
save_post(post)
end
end
def update_post(post, title, body, link)
old_slug = post.slug
post.title = title
post.body = body
post.link = link
save_post(post, old_slug)
end
def delete_post(year, month, slug)
post_dir = post_path(year, month)
delete_post_body(post_dir, slug)
delete_post_index(post_dir, slug)
end
def publish(production = false)
target = production ? 'publish' : 'publish_beta'
run("make #{target}")
end
private
def find_title(url)
@title_finder.find_title(url)
end
def path_for(*components)
File.join(@path, *components)
end
def post_path(*components)
path_for('public/posts', *components)
end
def save_post(post, old_slug = nil)
git_fetch
git_reset_hard('origin/master')
begin
post_dir = write_post(post, old_slug)
git_commit(post.title, post_dir)
git_push
post
rescue => e
git_reset_hard
raise PostSaveError.new('failed to save post', e)
end
end
def write_post(post, old_slug = nil)
post_dir = post_path(post.time.year.to_s, post.padded_month)
ensure_post_dir_exists(post_dir)
if old_slug
delete_post_body(post_dir, old_slug)
delete_post_index(post_dir, old_slug)
end
write_post_body(post_dir, post.slug, post.body)
begin
write_post_index(post_dir, post.slug, post.persistent_fields)
rescue => e
delete_post_body(post_dir, post.slug)
raise e
end
post_dir
end
def write_post_body(dir, slug, body)
post_filename = File.join(dir, "#{slug}.md")
write_file(post_filename, body)
end
def delete_post_body(dir, slug)
post_filename = File.join(dir, "#{slug}.md")
delete_file(post_filename)
end
def write_post_index(dir, slug, fields)
post_data = read_post_data(dir)
post_data[slug] = fields
write_post_data(dir, post_data)
end
def delete_post_index(dir, slug)
post_data = read_post_data(dir)
post_data.delete(slug)
write_post_data(dir, post_data)
end
def ensure_post_dir_exists(dir)
monthly_index_filename = File.join(dir, 'index.ejs')
unless File.exist?(monthly_index_filename)
source = File.join(dir, '../../2006/02/index.ejs')
cp(source, monthly_index_filename)
end
yearly_index_filename = File.join(dir, '../index.ejs')
unless File.exist?(yearly_index_filename)
source = File.join(dir, '../../2006/index.ejs')
cp(source, yearly_index_filename)
end
end
def read_post_data(dir)
post_data_filename = File.join(dir, '_data.json')
if File.exist?(post_data_filename)
JSON.parse(File.read(post_data_filename))
else
{}
end
end
def write_post_data(dir, data)
post_data_filename = File.join(dir, '_data.json')
json = JSON.pretty_generate(data)
write_file(post_data_filename, json)
end
def ensure_dir_exists(dir)
unless File.directory?(dir)
if @dry_run
puts ">>> mkdir -p '#{dir}'"
else
FileUtils.mkdir_p(dir)
end
end
end
def cp(source, destination, clobber = false)
ensure_dir_exists(File.dirname(destination))
if !File.exist?(destination) || clobber
if @dry_run
puts ">>> cp '#{source}' '#{destination}'"
else
FileUtils.cp(source, destination)
end
end
end
def write_file(filename, data)
ensure_dir_exists(File.dirname(filename))
if @dry_run
puts ">>> write file '#{filename}', contents:"
puts data
else
File.open(filename, 'w') do |f|
f.puts(data)
end
end
end
def delete_file(filename)
if File.exist?(filename)
if @dry_run
puts ">>> unlink '#{filename}'"
else
File.unlink(filename)
end
end
end
def quote(s)
s.gsub('"', '\\"')
end
def run(cmd)
if @dry_run
puts ">>> cd '#{@path}' && #{cmd}"
else
`cd '#{@path}' && #{cmd} 2>&1`
end
end
def git_commit(title, *files)
quoted_files = files.map { |f| "\"#{quote(f)}\"" }
message = "linked '#{quote(title)}'"
run("git add -A #{quoted_files.join(' ')} && git commit -m \"#{message}\"")
end
def git_fetch
run('git fetch')
end
def git_reset_hard(ref = nil)
args = ref ? "'#{ref}'" : ''
run("git reset --hard #{args}")
end
def git_push(force = false)
args = force ? '-f' : ''
run("git push #{args}")
end
end

View file

@ -0,0 +1,318 @@
require 'json'
require_relative './helpers'
require_relative '../harp_blog'
TEST_BLOG_PATH = File.expand_path('../test-blog', __FILE__)
TEST_BLOG_ORIGIN_PATH = File.expand_path('../test-blog-origin.git', __FILE__)
RSpec.configure do |c|
c.include Helpers
end
RSpec.describe HarpBlog::Post do
# Persistent fields: author, title, date, timestamp, link, url, tags
# Transient fields: time, slug, body
before :all do
@default_fields = {
title: 'samhuri.net',
link: 'http://samhuri.net',
body: 'this site is sick',
}
@default_slug = 'samhuri-net'
end
describe '#new' do
it "takes a Hash of fields" do
fields = @default_fields
post = HarpBlog::Post.new(fields)
expect(post.title).to eq(fields[:title])
expect(post.link).to eq(fields[:link])
expect(post.body).to eq(fields[:body])
end
it "accepts no parameters" do
post = HarpBlog::Post.new
expect(post).to be_truthy
end
it "ignores unknown fields" do
post = HarpBlog::Post.new(what: 'is this')
expect(post).to be_truthy
end
end
describe '#persistent_fields' do
it "contains all expected fields" do
all_keys = HarpBlog::Post::PERSISTENT_FIELDS.sort
post = HarpBlog::Post.new
expect(all_keys).to eq(post.persistent_fields.keys.sort)
end
end
describe '#fields' do
it "contains all expected fields" do
all_keys = HarpBlog::Post::FIELDS.sort
post = HarpBlog::Post.new
expect(all_keys).to eq(post.fields.keys.sort)
end
end
describe '#link?' do
it "returns true for link posts" do
post = HarpBlog::Post.new(link: @default_fields[:link])
expect(post.link?).to eq(true)
end
it "returns false for article posts" do
post = HarpBlog::Post.new
expect(post.link?).to eq(false)
end
end
describe '#time' do
it "should be derived from the timestamp if necessary" do
timestamp = Time.now.to_i
post = HarpBlog::Post.new(timestamp: timestamp)
expect(post.time.to_i).to eq(timestamp)
end
end
describe '#timestamp' do
it "should be derived from the time if necessary" do
time = Time.now - 42
post = HarpBlog::Post.new(time: time)
expect(post.timestamp).to eq(time.to_i)
end
end
describe '#url' do
it "should be derived from the time and slug if necessary" do
post = HarpBlog::Post.new(@default_fields)
year = post.time.year.to_s
month = post.time.month
padded_month = month < 10 ? " #{month}" : "#{month}"
expect(post.url).to eq("/posts/#{year}/#{padded_month}/#{@default_slug}")
end
end
describe '#slug' do
it "should be derived from the title if necessary" do
post = HarpBlog::Post.new(@default_fields)
expect(post.slug).to eq(@default_slug)
end
it "should strip apostrophes" do
post = HarpBlog::Post.new(title: "sjs's post")
expect(post.slug).to eq('sjss-post')
end
it "should replace most non-word characters with dashes" do
post = HarpBlog::Post.new(title: 'foo/bår!baz_quüx42')
expect(post.slug).to eq('foo-bår-baz_quüx42')
end
it "should strip leading and trailing dashes" do
post = HarpBlog::Post.new(title: '!foo?bar!')
expect(post.slug).to eq('foo-bar')
end
it "should collapse runs of dashes" do
post = HarpBlog::Post.new(title: 'foo???bar')
expect(post.slug).to eq('foo-bar')
end
end
describe '#pad' do
it "should have a leading zero for integers 0 < n < 10" do
post = HarpBlog::Post.new
expect(post.pad(1)).to eq('01')
expect(post.pad(9)).to eq('09')
end
it "should not have a leading zero for integers n >= 10" do
post = HarpBlog::Post.new
expect(post.pad(10)).to eq('10')
expect(post.pad(12)).to eq('12')
end
end
end
RSpec.describe HarpBlog do
before :each do
@test_blog_ref = git_sha(TEST_BLOG_PATH)
dry_run = false
@blog = HarpBlog.new(TEST_BLOG_PATH, dry_run)
end
after :each do
git_reset_hard(TEST_BLOG_PATH, @test_blog_ref)
force = true
@blog.send(:git_push, force)
end
describe '#new' do
it "should optionally accept dry_run" do
expect(@blog).to be_truthy
blog = HarpBlog.new(TEST_BLOG_PATH)
expect(blog).to be_truthy
end
end
describe '#years' do
it "should return all of the years with posts" do
# yup, if I don't blog for an entire year that's a bug!
years = (2006..Date.today.year).to_a.map(&:to_s)
expect(@blog.years).to eq(years)
end
end
describe '#posts_for_month' do
it "should return the correct number of posts" do
expect(@blog.posts_for_month('2006', '02').length).to eq(12)
end
it "should sort the posts by publish time" do
timestamps = @blog.posts_for_month('2006', '02').map(&:timestamp)
expect(increasing?(timestamps)).to be_truthy
end
end
describe '#posts_for_year' do
it "should return the correct number of posts" do
expect(@blog.posts_for_year('2006').length).to eq(31)
end
it "should sort the posts by publish time" do
timestamps = @blog.posts_for_year('2006').map(&:timestamp)
expect(increasing?(timestamps)).to be_truthy
end
end
describe '#create_post' do
it "should create a link post when a link is given" do
title = 'test post'
body = 'check this out'
link = 'http://samhuri.net'
post = @blog.create_post(title, body, link)
expect(post).to be_truthy
expect(post.link?).to be_truthy
expect(post.title).to eq(title)
expect(post.body).to eq(body)
expect(post.link).to eq(link)
expect(post.time.to_date).to eq(Date.today)
end
it "should create an article post when no link is given" do
title = 'test post'
body = 'check this out'
post = @blog.create_post(title, body, nil)
expect(post).to be_truthy
expect(post.link?).to be_falsy
expect(post.title).to eq(title)
expect(post.body).to eq(body)
expect(post.link).to eq(nil)
expect(post.time.to_date).to eq(Date.today)
end
it "should create a post that can be fetched immediately" do
title = 'fetch now'
body = 'blah blah blah'
post = @blog.create_post(title, body, nil)
expect(post).to be_truthy
today = Date.today
year = today.year.to_s
month = post.pad(today.month)
fetched_post = @blog.get_post(year, month, post.slug)
expect(post.url).to eq(fetched_post.url)
end
it "should fetch titles if necessary" do
class TitleFinder
def find_title(url) 'fancy title' end
end
dry_run = false
blog = HarpBlog.new(TEST_BLOG_PATH, dry_run, TitleFinder.new)
post = blog.create_post(nil, nil, 'http://samhuri.net')
expect(post.title).to eq('fancy title')
post = blog.create_post(" \t\n", nil, 'http://samhuri.net')
expect(post.title).to eq('fancy title')
end
it "should push the new post to the origin repo" do
title = 'fetch now'
body = 'blah blah blah'
post = @blog.create_post(title, body, nil)
local_sha = git_sha(TEST_BLOG_PATH)
origin_sha = git_sha(TEST_BLOG_ORIGIN_PATH)
expect(origin_sha).to eq(local_sha)
end
end
describe '#get_post' do
it "should return complete posts" do
first_post_path = File.join(TEST_BLOG_PATH, 'public/posts/2006/02/first-post.md')
post = @blog.get_post('2006', '02', 'first-post')
expect(post).to be_truthy
expect(post.author).to eq('Sami Samhuri')
expect(post.title).to eq('First Post!')
expect(post.slug).to eq('first-post')
expect(post.timestamp).to eq(1139368860)
expect(post.date).to eq('8th February, 2006')
expect(post.url).to eq('/posts/2006/02/first-post')
expect(post.link).to eq(nil)
expect(post.link?).to eq(false)
expect(post.tags).to eq(['life'])
expect(post.body).to eq(File.read(first_post_path))
end
it "should return nil if the post does not exist" do
post = @blog.get_post('2005', '01', 'anything')
expect(post).to be(nil)
end
end
describe '#update_post' do
it "should immediately reflect changes when fetched" do
post = @blog.get_post('2006', '02', 'first-post')
title = 'new title'
body = "new body\n"
link = 'new link'
@blog.update_post(post, title, body, link)
# new slug, new data
post = @blog.get_post('2006', '02', 'new-title')
expect(post.title).to eq(title)
expect(post.body).to eq(body)
expect(post.link).to eq(link)
# old post is long gone
post = @blog.get_post('2006', '02', 'first-post')
expect(post).to eq(nil)
end
end
describe '#delete_post' do
it "should delete existing posts" do
post = @blog.get_post('2006', '02', 'first-post')
expect(post).to be_truthy
@blog.delete_post('2006', '02', 'first-post')
post = @blog.get_post('2006', '02', 'first-post')
expect(post).to eq(nil)
end
it "should do nothing for non-existent posts" do
post = @blog.get_post('2006', '02', 'first-post')
expect(post).to be_truthy
@blog.delete_post('2006', '02', 'first-post')
@blog.delete_post('2006', '02', 'first-post')
end
end
end

39
server/spec/helpers.rb Normal file
View file

@ -0,0 +1,39 @@
module Helpers
def increasing?(list)
comparisons(list).all? { |x| x && x <= 0 }
end
def decreasing?(list)
comparisons(list).all? { |x| x && x >= 0 }
end
def comparisons(list)
x = list.first
list.drop(1).map do |y|
x <=> y
end
end
def git_bare?(dir)
!File.exist?(File.join(dir, '.git'))
end
def git_sha(dir)
if git_bare?(dir)
`cd '#{dir}' && cat "$(cut -d' ' -f2 HEAD)"`.strip
else
`cd '#{dir}' && git log -n1`.split[1].strip
end
end
def git_reset_hard(dir, ref = nil)
if git_bare?(dir)
raise 'git_reset_hard does not support bare repos'
else
args = ref ? "'#{ref}'" : ''
`cd '#{dir}' && git reset --hard #{args}`
end
end
end

View file

@ -0,0 +1,16 @@
require 'htmlentities'
require 'open-uri'
class WebTitleFinder
def find_title(url)
body = open(url).read
lines = body.split(/[\r\n]+/)
title_line = lines.grep(/<title/).first.strip
html_title = title_line.gsub(/\s*<\/?title[^>]*>\s*/, '')
HTMLEntities.new.decode(html_title)
rescue
nil
end
end