diff --git a/.gitignore b/.gitignore index 67f51e8..d0143ca 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ node_modules public/feed.xml www server/auth.json - +server/spec/test-blog* diff --git a/.tm_properties b/.tm_properties index 5c5a5ce..53ada62 100644 --- a/.tm_properties +++ b/.tm_properties @@ -1 +1,2 @@ -exclude = "{$exclude,www,node_modules,tweets,wayback}" +exclude = "{$exclude,www,node_modules,tweets,wayback,test-blog*}" +include = "{$include,.gitignore}" diff --git a/Gemfile b/Gemfile index 097a39a..e278773 100644 --- a/Gemfile +++ b/Gemfile @@ -1,8 +1,7 @@ source 'https://rubygems.org' gem 'builder' -gem 'json' -gem 'mustache' +gem 'htmlentities' gem 'rdiscount' -gem 'nokogiri' -gem 'css_parser' +gem 'rspec' +gem 'guard-rspec' diff --git a/Gemfile.lock b/Gemfile.lock index cd45153..a1ba32d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,24 +1,61 @@ GEM remote: https://rubygems.org/ specs: - addressable (2.3.5) - builder (3.0.0) - css_parser (1.3.5) - addressable - json (1.6.1) - mini_portile (0.5.2) - mustache (0.99.4) - nokogiri (1.6.1) - mini_portile (~> 0.5.0) + builder (3.2.2) + celluloid (0.16.0) + timers (~> 4.0.0) + coderay (1.1.0) + diff-lcs (1.2.5) + ffi (1.9.3) + formatador (0.2.5) + guard (2.6.1) + formatador (>= 0.2.4) + listen (~> 2.7) + lumberjack (~> 1.0) + pry (>= 0.9.12) + thor (>= 0.18.1) + guard-rspec (4.3.1) + guard (~> 2.1) + rspec (>= 2.14, < 4.0) + hitimes (1.2.2) + htmlentities (4.3.2) + listen (2.7.9) + celluloid (>= 0.15.2) + rb-fsevent (>= 0.9.3) + rb-inotify (>= 0.9) + lumberjack (1.0.9) + method_source (0.8.2) + pry (0.10.1) + coderay (~> 1.1.0) + method_source (~> 0.8.1) + slop (~> 3.4) + rb-fsevent (0.9.4) + rb-inotify (0.9.5) + ffi (>= 0.5.0) rdiscount (1.6.8) + rspec (3.0.0) + rspec-core (~> 3.0.0) + rspec-expectations (~> 3.0.0) + rspec-mocks (~> 3.0.0) + rspec-core (3.0.4) + rspec-support (~> 3.0.0) + rspec-expectations (3.0.4) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.0.0) + rspec-mocks (3.0.4) + rspec-support (~> 3.0.0) + rspec-support (3.0.4) + slop (3.6.0) + thor (0.19.1) + timers (4.0.1) + hitimes PLATFORMS ruby DEPENDENCIES builder - css_parser - json - mustache - nokogiri + guard-rspec + htmlentities rdiscount + rspec diff --git a/Makefile b/Makefile index 9d83efc..0122b63 100644 --- a/Makefile +++ b/Makefile @@ -12,4 +12,10 @@ publish_beta: compile @echo ./bin/publish.sh --beta --delete -.PHONY: compile publish publish_beta +test_blog: + ./bin/create-test-blog.sh server/spec/test-blog + +spec: + cd server && rspec -f documentation + +.PHONY: compile publish publish_beta test_blog spec diff --git a/bin/create-test-blog.sh b/bin/create-test-blog.sh new file mode 100755 index 0000000..2db42fd --- /dev/null +++ b/bin/create-test-blog.sh @@ -0,0 +1,16 @@ +#!/bin/zsh + +set -e # bail on errors + +BLOG_PATH="$1" +ORIGIN_BLOG_PATH="${BLOG_PATH}-origin.git" +if [[ -e "$BLOG_PATH" ]]; then + echo ">>> Refusing to clobber $BLOG_PATH" +else + if [[ ! -e "$ORIGIN_BLOG_PATH" ]]; then + echo ">>> Mirroring local origin..." + git clone --mirror git@github.com:samsonjs/samhuri.net.git "$ORIGIN_BLOG_PATH" + fi + echo ">>> Cloning test blog from local origin..." + git clone "$ORIGIN_BLOG_PATH" "$BLOG_PATH" +fi diff --git a/server/Guardfile b/server/Guardfile new file mode 100644 index 0000000..cd2e62f --- /dev/null +++ b/server/Guardfile @@ -0,0 +1,18 @@ +# A sample Guardfile +# More info at https://github.com/guard/guard#readme + +# Note: The cmd option is now required due to the increasing number of ways +# rspec may be run, below are examples of the most common uses. +# * bundler: 'bundle exec rspec' +# * bundler binstubs: 'bin/rspec' +# * spring: 'bin/rsspec' (This will use spring if running and you have +# installed the spring binstubs per the docs) +# * zeus: 'zeus rspec' (requires the server to be started separetly) +# * 'just' rspec: 'rspec' +guard :rspec, cmd: 'bundle exec rspec' do + watch('auth.json') { 'spec/server_spec.rb' } + watch(%r{^(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" } + + watch('spec/helpers.rb') { 'spec' } + watch(%r{^spec/.+_spec\.rb$}) +end diff --git a/server/harp_blog.rb b/server/harp_blog.rb new file mode 100644 index 0000000..6517674 --- /dev/null +++ b/server/harp_blog.rb @@ -0,0 +1,369 @@ +require 'fileutils' +require 'json' +require './web_title_finder' + +class HarpBlog + + class HarpBlogError < RuntimeError ; end + class InvalidDataError < HarpBlogError ; end + class PostExistsError < HarpBlogError ; end + + class PostSaveError < HarpBlogError + attr_reader :original_error + def initialize(message, original_error) + super(message) + @original_error = original_error + end + end + + class Post + PERSISTENT_FIELDS = %w[author title date timestamp link url tags].map(&:to_sym) + TRANSIENT_FIELDS = %w[time slug body].map(&:to_sym) + FIELDS = PERSISTENT_FIELDS + TRANSIENT_FIELDS + attr_accessor *FIELDS + + def initialize(fields = nil) + if fields + FIELDS.each do |k| + if v = fields[k.to_s] || fields[k.to_sym] + instance_variable_set("@#{k}", v) + end + end + end + end + + def persistent_fields + PERSISTENT_FIELDS.inject({}) do |h, k| + h[k] = send(k) + h + end + end + + def fields + FIELDS.inject({}) do |h, k| + h[k] = send(k) + h + end + end + + def link? + !!link + end + + def title=(title) + @slug = nil + @title = title + end + + def author + @author ||= 'Sami Samhuri' + end + + def time + @time ||= @timestamp ? Time.at(@timestamp) : Time.now + end + + def timestamp + @timestamp ||= time.to_i + end + + def url + @url ||= "/posts/#{time.year}/#{padded_month}/#{slug}" + end + + def slug + # TODO: be intelligent about unicode ... \p{Word} might help. negated char class with it? + if title + @slug ||= title.downcase. + gsub(/'/, ''). + gsub(/[^[:alpha:]\d_]/, '-'). + gsub(/^-+|-+$/, ''). + gsub(/-+/, '-') + end + end + + def date + @date ||= time.strftime('%B %d, %Y') + end + + def tags + @tags ||= [] + end + + def padded_month + pad(time.month) + end + + def pad(n) + n.to_i < 10 ? "0#{n}" : "#{n}" + end + end # Post + + + def initialize(path, dry_run = true, title_finder = nil) + @path = path + @dry_run = dry_run + @title_finder = title_finder || WebTitleFinder.new + end + + def years + Dir[post_path('20*')].map { |x| File.basename(x) }.sort + end + + def posts_for_year(year) + posts = [] + 1.upto(12) do |n| + month = n < 10 ? "0#{n}" : "#{n}" + posts += posts_for_month(year, month) + end + posts + end + + def posts_for_month(year, month) + post_dir = post_path(year, month) + post_data = read_post_data(post_dir) + post_data.values.sort_by {|p| p['timestamp'] }.map {|p| Post.new(p) } + end + + def get_post(year, month, slug) + post_dir = post_path(year, month) + post_filename = File.join(post_dir, "#{slug}.md") + post_data = read_post_data(post_dir) + if File.exist?(post_filename) && fields = post_data[slug] + fields[:body] = File.read(post_filename) + Post.new(fields) + elsif fields + message = "missing post body for #{year}/#{month}/#{slug}: #{post_filename}" + $stderr.puts "[HarpBlog#get_post] #{message}" + raise InvalidDataError.new(message) + elsif File.exist?(post_filename) + message = "missing metadata for #{year}/#{month}/#{slug}: #{post_dir}/_data.json" + $stderr.puts "[HarpBlog#get_post] #{message}" + raise InvalidDataError.new(message) + end + end + + def create_post(title, body, link) + if !title || title.strip.length == 0 + title = find_title(link) + end + unless title + raise "cannot find title for #{link}" + end + fields = { + title: title, + link: link, + body: body, + } + post = Post.new(fields) + year, month, slug = post.time.year, post.padded_month, post.slug + + begin + existing_post = get_post(year.to_s, month, slug) + rescue InvalidDataError => e + $stderr.puts "[HarpBlog#create_post] deleting post with invalid data: #{e.message}" + delete_post(year.to_s, month, slug) + existing_post = nil + end + + if existing_post + raise PostExistsError.new("post exists: #{year}/#{month}/#{slug}") + else + save_post(post) + end + end + + def update_post(post, title, body, link) + old_slug = post.slug + post.title = title + post.body = body + post.link = link + save_post(post, old_slug) + end + + def delete_post(year, month, slug) + post_dir = post_path(year, month) + delete_post_body(post_dir, slug) + delete_post_index(post_dir, slug) + end + + def publish(production = false) + target = production ? 'publish' : 'publish_beta' + run("make #{target}") + end + + + private + + def find_title(url) + @title_finder.find_title(url) + end + + def path_for(*components) + File.join(@path, *components) + end + + def post_path(*components) + path_for('public/posts', *components) + end + + def save_post(post, old_slug = nil) + git_fetch + git_reset_hard('origin/master') + + begin + post_dir = write_post(post, old_slug) + git_commit(post.title, post_dir) + git_push + post + + rescue => e + git_reset_hard + raise PostSaveError.new('failed to save post', e) + end + end + + def write_post(post, old_slug = nil) + post_dir = post_path(post.time.year.to_s, post.padded_month) + ensure_post_dir_exists(post_dir) + if old_slug + delete_post_body(post_dir, old_slug) + delete_post_index(post_dir, old_slug) + end + write_post_body(post_dir, post.slug, post.body) + begin + write_post_index(post_dir, post.slug, post.persistent_fields) + rescue => e + delete_post_body(post_dir, post.slug) + raise e + end + post_dir + end + + def write_post_body(dir, slug, body) + post_filename = File.join(dir, "#{slug}.md") + write_file(post_filename, body) + end + + def delete_post_body(dir, slug) + post_filename = File.join(dir, "#{slug}.md") + delete_file(post_filename) + end + + def write_post_index(dir, slug, fields) + post_data = read_post_data(dir) + post_data[slug] = fields + write_post_data(dir, post_data) + end + + def delete_post_index(dir, slug) + post_data = read_post_data(dir) + post_data.delete(slug) + write_post_data(dir, post_data) + end + + def ensure_post_dir_exists(dir) + monthly_index_filename = File.join(dir, 'index.ejs') + unless File.exist?(monthly_index_filename) + source = File.join(dir, '../../2006/02/index.ejs') + cp(source, monthly_index_filename) + end + + yearly_index_filename = File.join(dir, '../index.ejs') + unless File.exist?(yearly_index_filename) + source = File.join(dir, '../../2006/index.ejs') + cp(source, yearly_index_filename) + end + end + + def read_post_data(dir) + post_data_filename = File.join(dir, '_data.json') + if File.exist?(post_data_filename) + JSON.parse(File.read(post_data_filename)) + else + {} + end + end + + def write_post_data(dir, data) + post_data_filename = File.join(dir, '_data.json') + json = JSON.pretty_generate(data) + write_file(post_data_filename, json) + end + + def ensure_dir_exists(dir) + unless File.directory?(dir) + if @dry_run + puts ">>> mkdir -p '#{dir}'" + else + FileUtils.mkdir_p(dir) + end + end + end + + def cp(source, destination, clobber = false) + ensure_dir_exists(File.dirname(destination)) + if !File.exist?(destination) || clobber + if @dry_run + puts ">>> cp '#{source}' '#{destination}'" + else + FileUtils.cp(source, destination) + end + end + end + + def write_file(filename, data) + ensure_dir_exists(File.dirname(filename)) + if @dry_run + puts ">>> write file '#{filename}', contents:" + puts data + else + File.open(filename, 'w') do |f| + f.puts(data) + end + end + end + + def delete_file(filename) + if File.exist?(filename) + if @dry_run + puts ">>> unlink '#{filename}'" + else + File.unlink(filename) + end + end + end + + def quote(s) + s.gsub('"', '\\"') + end + + def run(cmd) + if @dry_run + puts ">>> cd '#{@path}' && #{cmd}" + else + `cd '#{@path}' && #{cmd} 2>&1` + end + end + + def git_commit(title, *files) + quoted_files = files.map { |f| "\"#{quote(f)}\"" } + message = "linked '#{quote(title)}'" + run("git add -A #{quoted_files.join(' ')} && git commit -m \"#{message}\"") + end + + def git_fetch + run('git fetch') + end + + def git_reset_hard(ref = nil) + args = ref ? "'#{ref}'" : '' + run("git reset --hard #{args}") + end + + def git_push(force = false) + args = force ? '-f' : '' + run("git push #{args}") + end + +end diff --git a/server/spec/harp_blog_spec.rb b/server/spec/harp_blog_spec.rb new file mode 100644 index 0000000..619ffac --- /dev/null +++ b/server/spec/harp_blog_spec.rb @@ -0,0 +1,318 @@ +require 'json' +require_relative './helpers' +require_relative '../harp_blog' + +TEST_BLOG_PATH = File.expand_path('../test-blog', __FILE__) +TEST_BLOG_ORIGIN_PATH = File.expand_path('../test-blog-origin.git', __FILE__) + +RSpec.configure do |c| + c.include Helpers +end + +RSpec.describe HarpBlog::Post do + + # Persistent fields: author, title, date, timestamp, link, url, tags + # Transient fields: time, slug, body + + before :all do + @default_fields = { + title: 'samhuri.net', + link: 'http://samhuri.net', + body: 'this site is sick', + } + @default_slug = 'samhuri-net' + end + + describe '#new' do + it "takes a Hash of fields" do + fields = @default_fields + post = HarpBlog::Post.new(fields) + expect(post.title).to eq(fields[:title]) + expect(post.link).to eq(fields[:link]) + expect(post.body).to eq(fields[:body]) + end + + it "accepts no parameters" do + post = HarpBlog::Post.new + expect(post).to be_truthy + end + + it "ignores unknown fields" do + post = HarpBlog::Post.new(what: 'is this') + expect(post).to be_truthy + end + end + + describe '#persistent_fields' do + it "contains all expected fields" do + all_keys = HarpBlog::Post::PERSISTENT_FIELDS.sort + post = HarpBlog::Post.new + expect(all_keys).to eq(post.persistent_fields.keys.sort) + end + end + + describe '#fields' do + it "contains all expected fields" do + all_keys = HarpBlog::Post::FIELDS.sort + post = HarpBlog::Post.new + expect(all_keys).to eq(post.fields.keys.sort) + end + end + + describe '#link?' do + it "returns true for link posts" do + post = HarpBlog::Post.new(link: @default_fields[:link]) + expect(post.link?).to eq(true) + end + + it "returns false for article posts" do + post = HarpBlog::Post.new + expect(post.link?).to eq(false) + end + end + + describe '#time' do + it "should be derived from the timestamp if necessary" do + timestamp = Time.now.to_i + post = HarpBlog::Post.new(timestamp: timestamp) + expect(post.time.to_i).to eq(timestamp) + end + end + + describe '#timestamp' do + it "should be derived from the time if necessary" do + time = Time.now - 42 + post = HarpBlog::Post.new(time: time) + expect(post.timestamp).to eq(time.to_i) + end + end + + describe '#url' do + it "should be derived from the time and slug if necessary" do + post = HarpBlog::Post.new(@default_fields) + year = post.time.year.to_s + month = post.time.month + padded_month = month < 10 ? " #{month}" : "#{month}" + expect(post.url).to eq("/posts/#{year}/#{padded_month}/#{@default_slug}") + end + end + + describe '#slug' do + it "should be derived from the title if necessary" do + post = HarpBlog::Post.new(@default_fields) + expect(post.slug).to eq(@default_slug) + end + + it "should strip apostrophes" do + post = HarpBlog::Post.new(title: "sjs's post") + expect(post.slug).to eq('sjss-post') + end + + it "should replace most non-word characters with dashes" do + post = HarpBlog::Post.new(title: 'foo/bår!baz_quüx42') + expect(post.slug).to eq('foo-bår-baz_quüx42') + end + + it "should strip leading and trailing dashes" do + post = HarpBlog::Post.new(title: '!foo?bar!') + expect(post.slug).to eq('foo-bar') + end + + it "should collapse runs of dashes" do + post = HarpBlog::Post.new(title: 'foo???bar') + expect(post.slug).to eq('foo-bar') + end + end + + describe '#pad' do + it "should have a leading zero for integers 0 < n < 10" do + post = HarpBlog::Post.new + expect(post.pad(1)).to eq('01') + expect(post.pad(9)).to eq('09') + end + + it "should not have a leading zero for integers n >= 10" do + post = HarpBlog::Post.new + expect(post.pad(10)).to eq('10') + expect(post.pad(12)).to eq('12') + end + end +end + +RSpec.describe HarpBlog do + before :each do + @test_blog_ref = git_sha(TEST_BLOG_PATH) + dry_run = false + @blog = HarpBlog.new(TEST_BLOG_PATH, dry_run) + end + + after :each do + git_reset_hard(TEST_BLOG_PATH, @test_blog_ref) + force = true + @blog.send(:git_push, force) + end + + describe '#new' do + it "should optionally accept dry_run" do + expect(@blog).to be_truthy + + blog = HarpBlog.new(TEST_BLOG_PATH) + expect(blog).to be_truthy + end + end + + describe '#years' do + it "should return all of the years with posts" do + # yup, if I don't blog for an entire year that's a bug! + years = (2006..Date.today.year).to_a.map(&:to_s) + expect(@blog.years).to eq(years) + end + end + + describe '#posts_for_month' do + it "should return the correct number of posts" do + expect(@blog.posts_for_month('2006', '02').length).to eq(12) + end + + it "should sort the posts by publish time" do + timestamps = @blog.posts_for_month('2006', '02').map(&:timestamp) + expect(increasing?(timestamps)).to be_truthy + end + end + + describe '#posts_for_year' do + it "should return the correct number of posts" do + expect(@blog.posts_for_year('2006').length).to eq(31) + end + + it "should sort the posts by publish time" do + timestamps = @blog.posts_for_year('2006').map(&:timestamp) + expect(increasing?(timestamps)).to be_truthy + end + end + + describe '#create_post' do + it "should create a link post when a link is given" do + title = 'test post' + body = 'check this out' + link = 'http://samhuri.net' + post = @blog.create_post(title, body, link) + expect(post).to be_truthy + expect(post.link?).to be_truthy + expect(post.title).to eq(title) + expect(post.body).to eq(body) + expect(post.link).to eq(link) + expect(post.time.to_date).to eq(Date.today) + end + + it "should create an article post when no link is given" do + title = 'test post' + body = 'check this out' + post = @blog.create_post(title, body, nil) + expect(post).to be_truthy + expect(post.link?).to be_falsy + expect(post.title).to eq(title) + expect(post.body).to eq(body) + expect(post.link).to eq(nil) + expect(post.time.to_date).to eq(Date.today) + end + + it "should create a post that can be fetched immediately" do + title = 'fetch now' + body = 'blah blah blah' + post = @blog.create_post(title, body, nil) + expect(post).to be_truthy + + today = Date.today + year = today.year.to_s + month = post.pad(today.month) + fetched_post = @blog.get_post(year, month, post.slug) + expect(post.url).to eq(fetched_post.url) + end + + it "should fetch titles if necessary" do + class TitleFinder + def find_title(url) 'fancy title' end + end + dry_run = false + blog = HarpBlog.new(TEST_BLOG_PATH, dry_run, TitleFinder.new) + post = blog.create_post(nil, nil, 'http://samhuri.net') + expect(post.title).to eq('fancy title') + post = blog.create_post(" \t\n", nil, 'http://samhuri.net') + expect(post.title).to eq('fancy title') + end + + it "should push the new post to the origin repo" do + title = 'fetch now' + body = 'blah blah blah' + post = @blog.create_post(title, body, nil) + local_sha = git_sha(TEST_BLOG_PATH) + origin_sha = git_sha(TEST_BLOG_ORIGIN_PATH) + expect(origin_sha).to eq(local_sha) + end + end + + describe '#get_post' do + it "should return complete posts" do + first_post_path = File.join(TEST_BLOG_PATH, 'public/posts/2006/02/first-post.md') + post = @blog.get_post('2006', '02', 'first-post') + expect(post).to be_truthy + expect(post.author).to eq('Sami Samhuri') + expect(post.title).to eq('First Post!') + expect(post.slug).to eq('first-post') + expect(post.timestamp).to eq(1139368860) + expect(post.date).to eq('8th February, 2006') + expect(post.url).to eq('/posts/2006/02/first-post') + expect(post.link).to eq(nil) + expect(post.link?).to eq(false) + expect(post.tags).to eq(['life']) + expect(post.body).to eq(File.read(first_post_path)) + end + + it "should return nil if the post does not exist" do + post = @blog.get_post('2005', '01', 'anything') + expect(post).to be(nil) + end + end + + describe '#update_post' do + it "should immediately reflect changes when fetched" do + post = @blog.get_post('2006', '02', 'first-post') + title = 'new title' + body = "new body\n" + link = 'new link' + @blog.update_post(post, title, body, link) + + # new slug, new data + post = @blog.get_post('2006', '02', 'new-title') + expect(post.title).to eq(title) + expect(post.body).to eq(body) + expect(post.link).to eq(link) + + # old post is long gone + post = @blog.get_post('2006', '02', 'first-post') + expect(post).to eq(nil) + end + end + + describe '#delete_post' do + it "should delete existing posts" do + post = @blog.get_post('2006', '02', 'first-post') + expect(post).to be_truthy + + @blog.delete_post('2006', '02', 'first-post') + + post = @blog.get_post('2006', '02', 'first-post') + expect(post).to eq(nil) + end + + it "should do nothing for non-existent posts" do + post = @blog.get_post('2006', '02', 'first-post') + expect(post).to be_truthy + + @blog.delete_post('2006', '02', 'first-post') + @blog.delete_post('2006', '02', 'first-post') + end + end + +end diff --git a/server/spec/helpers.rb b/server/spec/helpers.rb new file mode 100644 index 0000000..32841e5 --- /dev/null +++ b/server/spec/helpers.rb @@ -0,0 +1,39 @@ +module Helpers + + def increasing?(list) + comparisons(list).all? { |x| x && x <= 0 } + end + + def decreasing?(list) + comparisons(list).all? { |x| x && x >= 0 } + end + + def comparisons(list) + x = list.first + list.drop(1).map do |y| + x <=> y + end + end + + def git_bare?(dir) + !File.exist?(File.join(dir, '.git')) + end + + def git_sha(dir) + if git_bare?(dir) + `cd '#{dir}' && cat "$(cut -d' ' -f2 HEAD)"`.strip + else + `cd '#{dir}' && git log -n1`.split[1].strip + end + end + + def git_reset_hard(dir, ref = nil) + if git_bare?(dir) + raise 'git_reset_hard does not support bare repos' + else + args = ref ? "'#{ref}'" : '' + `cd '#{dir}' && git reset --hard #{args}` + end + end + +end diff --git a/server/web_title_finder.rb b/server/web_title_finder.rb new file mode 100644 index 0000000..5103095 --- /dev/null +++ b/server/web_title_finder.rb @@ -0,0 +1,16 @@ +require 'htmlentities' +require 'open-uri' + +class WebTitleFinder + + def find_title(url) + body = open(url).read + lines = body.split(/[\r\n]+/) + title_line = lines.grep(/]*>\s*/, '') + HTMLEntities.new.decode(html_title) + rescue + nil + end + +end