mirror of
https://github.com/samsonjs/samhuri.net.git
synced 2026-03-25 09:05:47 +00:00
60 lines
2.2 KiB
JavaScript
Executable file
60 lines
2.2 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
|
|
var fs = require('fs')
|
|
, jsdom = require('jsdom')
|
|
, strftime = require('strftime').strftime
|
|
|
|
fs.readFile(process.argv[2] || 'sjs 301 moved permanently.html', 'utf8', function(err, html) {
|
|
jsdom.env({ html: html
|
|
, scripts: [ 'http://code.jquery.com/jquery-1.6.min.js' ]
|
|
}, onLoad)
|
|
})
|
|
|
|
function onLoad(err, window) {
|
|
var $ = window.jQuery
|
|
console.log('title: ' + $('.entry-title a').text())
|
|
console.log('url: ' + $('.entry-title a').attr('href').replace(/^http:\/\/web.archive.org\/web\/\d+\//, ''))
|
|
console.log('iso date: ' + $('abbr.published').attr('title'))
|
|
var tags = $('ul.meta li:first-child a').map(function(){ return $(this).text() }).get()
|
|
console.log('tags: ' + tags)
|
|
// console.log('body: ' + $('.entry-content').html().trim())
|
|
var comments = []
|
|
, $comments = $('li.comment')
|
|
$.each($comments, function(i, x) {
|
|
var author = $('div.author > cite > span.author > *', x)
|
|
comments.push({
|
|
author: author.text()
|
|
, url: author.attr('href').replace(/^http:\/\/web.archive.org\/web\/\d+\//, '')
|
|
, date: $('div.author > abbr', x).attr('title')
|
|
, body: $('div.content', x).text().trim()
|
|
})
|
|
})
|
|
// console.log('comments: ' + comments.length)
|
|
var post = {
|
|
title: $('.entry-title a').text()
|
|
, url: $('.entry-title a').attr('href').replace(/^http:\/\/web.archive.org\/web\/\d+\//, '')
|
|
, ISODate: $('abbr.published').attr('title')
|
|
, body: $('.entry-content').html().trim()
|
|
, tags: tags
|
|
, comments: comments
|
|
}
|
|
, s = [ 'Title: ' + post.title
|
|
, 'Date: ' + strftime('%B %e, %Y', new Date(post.ISODate))
|
|
, 'Timestamp: ' + strftime('%s', new Date(post.ISODate))
|
|
, 'Author: sjs'
|
|
, 'Tags: ' + post.tags.join(', ')
|
|
, '----'
|
|
, ''
|
|
, post.body
|
|
, ''
|
|
].join('\n')
|
|
, slug = strftime('%Y.%m.%d-' + post.title
|
|
.toLowerCase()
|
|
.replace(/[^\sa-z0-9._-]/g, '')
|
|
.replace(/\s+/g, '-'), new Date(post.ISODate))
|
|
console.log('slug: ' + slug)
|
|
fs.writeFileSync('../recovered/' + slug + '.html', s, 'utf8')
|
|
console.log(post.title + ' (' + slug + '.html)')
|
|
console.log()
|
|
// console.log(s)
|
|
}
|