use optparse, don't clobber existing hashes, add more options

This commit is contained in:
Sami Samhuri 2012-04-28 09:17:22 -07:00
parent 1e7dfe7721
commit 9417a54eae

190
scrub
View file

@ -1,38 +1,39 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
require 'json' require 'json'
require 'optparse'
require 'ostruct'
class Scrubber class Scrubber
class Result attr_reader :failures
attr_reader :status attr_reader :options
attr_reader :failures attr_reader :root_dir
attr_reader :status
def initialize(options) def self.scrub(root_dir, options = {})
@status = options[:status] new(root_dir, options).scrub
@failures = options[:failures] || []
end
def ok?
@status == :ok
end
end end
def initialize(root_dir) def initialize(root_dir, options = {})
@root_dir = File.realpath(root_dir)
@failures = [] @failures = []
@options = options
@root_dir = File.realpath(root_dir)
@status = :ok
end end
def scrub(dir = @root_dir) def scrub(dir = @root_dir)
return if File.exist?(File.join(dir, 'noscrub')) hash_file = hashes_filename(dir)
if File.exist?(File.join(dir, 'noscrub'))
if File.exists?(hash_file)
File.unlink(hash_file)
end
return self
end
# restore hashes if already scrubbed # restore hashes if already scrubbed
hashes = expected_hashes = hashes(dir)
if File.exist?(hash_filename(dir)) new_hashes = {}
JSON.parse(File.read(hash_filename(dir)))
else
{}
end
# walk the directory # walk the directory
Dir[File.join(dir, '*')].each do |file| Dir[File.join(dir, '*')].each do |file|
@ -47,68 +48,133 @@ class Scrubber
# scrub this file # scrub this file
else else
basename = File.basename(file) basename = File.basename(file)
expected_hash = expected_hashes[basename]
next if basename == 'scrub.json' next if basename == 'scrub.json'
if options.skip_existing && expected_hash
new_hashes[basename] = expected_hash
next
end
relative_filename = file.sub(@root_dir + '/', '') relative_filename = file.sub(@root_dir + '/', '')
hash = sha1(file) result, hash = scrub_file(file, expected_hash)
if expected_hash = hashes[basename] case result
unless hash == expected_hash when :ok
@failures << { new_hashes[basename] = hash
:filename => relative_filename, puts "[ok] #{hash} - #{relative_filename}" if options.verbose
:hash => hash, when :new
:expected_hash => expected_hash new_hashes[basename] = hash
} puts "[new] #{hash} - #{relative_filename}" if options.verbose
puts "!! #{hash} not ok: #{relative_filename}" when :fail
else @failures << {
puts " * #{hash} ok: #{relative_filename}" :filename => relative_filename,
end :hash => hash,
else :expected_hash => expected_hash
hashes[basename] = hash }
puts " * #{hash} new: #{relative_filename}" @status = :fail
puts "[FAIL] #{hash} - #{relative_filename} (previously had sha #{expected_hash})"
end end
end end
end end
# persist the hashes write_hashes(dir, new_hashes)
File.open(hash_filename(dir), 'w') { |f| f.puts(JSON.fast_generate(hashes)) } self
end
# build and return our result # Returns
@result = Result.new( def scrub_file(file, expected_hash)
:status => @failures.length == 0 ? :ok : :fail, basename = File.basename(file)
:failures => @failures hash = sha1(file)
) result =
if hash == expected_hash
result = :ok
elsif expected_hash
result = :fail
else
result = :new
end
[result, hash]
end
def ok?
@status == :ok
end
def fail?
@status == :fail
end end
def sha1(filename) def sha1(filename)
`shasum "#{filename}"`.split.first `zsh -c "noglob shasum \\\"#{filename}\\\""`.split.first
end end
def hash_filename(dir) def hashes_filename(dir)
File.join(dir, 'scrub.json') File.join(dir, 'scrub.json')
end end
def hashes(dir)
f = hashes_filename(dir)
if File.exist?(f)
JSON.parse(File.read(f))
else
{}
end
end
def write_hashes(dir, hashes)
return if options.phantom
f = hashes_filename(dir)
if hashes.size > 0
File.open(f, 'w') { |f| f.puts(JSON.fast_generate(hashes)) }
elsif File.exists?(f)
File.unlink(f)
end
end
end end
def main def main
if root_dir = ARGV.shift options = OpenStruct.new
unless File.directory?(root_dir) options.phantom = false
puts "error: #{root_dir} is not directory" options.skip_existing = false
exit 1 options.verbose = false
end
result = Scrubber.new(root_dir).scrub OptionParser.new do |opts|
opts.banner = 'Usage: scrub [options] <root-directory>'
unless result.ok? opts.on('-h', '--help', 'Show this help') do
# report failures puts opts
result.failures.sort do |a,b| exit
a[:filename] <=> b[:filename] end
end.each do |failure| opts.on('-p', '--phantom', 'Do everything except write scrub.json files. Useful for testing.') do
puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}" options.phantom = true
end end
exit 1 opts.on('-s', '--skip-existing', 'Only calculate new checksums, skipping files with existing hashes') do
options.skip_existing = true
end
opts.on('-v', '--verbose', 'Log every file that is checked') do
options.verbose = true
end
end.parse!
root_dir = ARGV.shift || '.'
unless File.directory?(root_dir)
puts "error: #{root_dir} is not directory"
exit 1
end
result = Scrubber.scrub(root_dir, options)
# TODO print a summary
# Failures may have been lost in the noise so report them at the
# end as well when -v is given.
if result.fail? && options.verbose
puts
puts "*** Failures:"
# report failures
result.failures.sort do |a,b|
a[:filename] <=> b[:filename]
end.each do |failure|
puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}"
end end
else
puts 'Usage: scrub </your/precious/data>'
exit 1 exit 1
end end
end end