use optparse, don't clobber existing hashes, add more options
This commit is contained in:
parent
1e7dfe7721
commit
9417a54eae
1 changed files with 128 additions and 62 deletions
190
scrub
190
scrub
|
|
@ -1,38 +1,39 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
require 'json'
|
require 'json'
|
||||||
|
require 'optparse'
|
||||||
|
require 'ostruct'
|
||||||
|
|
||||||
class Scrubber
|
class Scrubber
|
||||||
|
|
||||||
class Result
|
attr_reader :failures
|
||||||
attr_reader :status
|
attr_reader :options
|
||||||
attr_reader :failures
|
attr_reader :root_dir
|
||||||
|
attr_reader :status
|
||||||
|
|
||||||
def initialize(options)
|
def self.scrub(root_dir, options = {})
|
||||||
@status = options[:status]
|
new(root_dir, options).scrub
|
||||||
@failures = options[:failures] || []
|
|
||||||
end
|
|
||||||
|
|
||||||
def ok?
|
|
||||||
@status == :ok
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(root_dir)
|
def initialize(root_dir, options = {})
|
||||||
@root_dir = File.realpath(root_dir)
|
|
||||||
@failures = []
|
@failures = []
|
||||||
|
@options = options
|
||||||
|
@root_dir = File.realpath(root_dir)
|
||||||
|
@status = :ok
|
||||||
end
|
end
|
||||||
|
|
||||||
def scrub(dir = @root_dir)
|
def scrub(dir = @root_dir)
|
||||||
return if File.exist?(File.join(dir, 'noscrub'))
|
hash_file = hashes_filename(dir)
|
||||||
|
if File.exist?(File.join(dir, 'noscrub'))
|
||||||
|
if File.exists?(hash_file)
|
||||||
|
File.unlink(hash_file)
|
||||||
|
end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
# restore hashes if already scrubbed
|
# restore hashes if already scrubbed
|
||||||
hashes =
|
expected_hashes = hashes(dir)
|
||||||
if File.exist?(hash_filename(dir))
|
new_hashes = {}
|
||||||
JSON.parse(File.read(hash_filename(dir)))
|
|
||||||
else
|
|
||||||
{}
|
|
||||||
end
|
|
||||||
|
|
||||||
# walk the directory
|
# walk the directory
|
||||||
Dir[File.join(dir, '*')].each do |file|
|
Dir[File.join(dir, '*')].each do |file|
|
||||||
|
|
@ -47,68 +48,133 @@ class Scrubber
|
||||||
# scrub this file
|
# scrub this file
|
||||||
else
|
else
|
||||||
basename = File.basename(file)
|
basename = File.basename(file)
|
||||||
|
expected_hash = expected_hashes[basename]
|
||||||
next if basename == 'scrub.json'
|
next if basename == 'scrub.json'
|
||||||
|
if options.skip_existing && expected_hash
|
||||||
|
new_hashes[basename] = expected_hash
|
||||||
|
next
|
||||||
|
end
|
||||||
relative_filename = file.sub(@root_dir + '/', '')
|
relative_filename = file.sub(@root_dir + '/', '')
|
||||||
hash = sha1(file)
|
result, hash = scrub_file(file, expected_hash)
|
||||||
if expected_hash = hashes[basename]
|
case result
|
||||||
unless hash == expected_hash
|
when :ok
|
||||||
@failures << {
|
new_hashes[basename] = hash
|
||||||
:filename => relative_filename,
|
puts "[ok] #{hash} - #{relative_filename}" if options.verbose
|
||||||
:hash => hash,
|
when :new
|
||||||
:expected_hash => expected_hash
|
new_hashes[basename] = hash
|
||||||
}
|
puts "[new] #{hash} - #{relative_filename}" if options.verbose
|
||||||
puts "!! #{hash} not ok: #{relative_filename}"
|
when :fail
|
||||||
else
|
@failures << {
|
||||||
puts " * #{hash} ok: #{relative_filename}"
|
:filename => relative_filename,
|
||||||
end
|
:hash => hash,
|
||||||
else
|
:expected_hash => expected_hash
|
||||||
hashes[basename] = hash
|
}
|
||||||
puts " * #{hash} new: #{relative_filename}"
|
@status = :fail
|
||||||
|
puts "[FAIL] #{hash} - #{relative_filename} (previously had sha #{expected_hash})"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# persist the hashes
|
write_hashes(dir, new_hashes)
|
||||||
File.open(hash_filename(dir), 'w') { |f| f.puts(JSON.fast_generate(hashes)) }
|
self
|
||||||
|
end
|
||||||
|
|
||||||
# build and return our result
|
# Returns
|
||||||
@result = Result.new(
|
def scrub_file(file, expected_hash)
|
||||||
:status => @failures.length == 0 ? :ok : :fail,
|
basename = File.basename(file)
|
||||||
:failures => @failures
|
hash = sha1(file)
|
||||||
)
|
result =
|
||||||
|
if hash == expected_hash
|
||||||
|
result = :ok
|
||||||
|
elsif expected_hash
|
||||||
|
result = :fail
|
||||||
|
else
|
||||||
|
result = :new
|
||||||
|
end
|
||||||
|
[result, hash]
|
||||||
|
end
|
||||||
|
|
||||||
|
def ok?
|
||||||
|
@status == :ok
|
||||||
|
end
|
||||||
|
|
||||||
|
def fail?
|
||||||
|
@status == :fail
|
||||||
end
|
end
|
||||||
|
|
||||||
def sha1(filename)
|
def sha1(filename)
|
||||||
`shasum "#{filename}"`.split.first
|
`zsh -c "noglob shasum \\\"#{filename}\\\""`.split.first
|
||||||
end
|
end
|
||||||
|
|
||||||
def hash_filename(dir)
|
def hashes_filename(dir)
|
||||||
File.join(dir, 'scrub.json')
|
File.join(dir, 'scrub.json')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def hashes(dir)
|
||||||
|
f = hashes_filename(dir)
|
||||||
|
if File.exist?(f)
|
||||||
|
JSON.parse(File.read(f))
|
||||||
|
else
|
||||||
|
{}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_hashes(dir, hashes)
|
||||||
|
return if options.phantom
|
||||||
|
f = hashes_filename(dir)
|
||||||
|
if hashes.size > 0
|
||||||
|
File.open(f, 'w') { |f| f.puts(JSON.fast_generate(hashes)) }
|
||||||
|
elsif File.exists?(f)
|
||||||
|
File.unlink(f)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def main
|
def main
|
||||||
if root_dir = ARGV.shift
|
options = OpenStruct.new
|
||||||
unless File.directory?(root_dir)
|
options.phantom = false
|
||||||
puts "error: #{root_dir} is not directory"
|
options.skip_existing = false
|
||||||
exit 1
|
options.verbose = false
|
||||||
end
|
|
||||||
|
|
||||||
result = Scrubber.new(root_dir).scrub
|
OptionParser.new do |opts|
|
||||||
|
opts.banner = 'Usage: scrub [options] <root-directory>'
|
||||||
unless result.ok?
|
opts.on('-h', '--help', 'Show this help') do
|
||||||
# report failures
|
puts opts
|
||||||
result.failures.sort do |a,b|
|
exit
|
||||||
a[:filename] <=> b[:filename]
|
end
|
||||||
end.each do |failure|
|
opts.on('-p', '--phantom', 'Do everything except write scrub.json files. Useful for testing.') do
|
||||||
puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}"
|
options.phantom = true
|
||||||
end
|
end
|
||||||
exit 1
|
opts.on('-s', '--skip-existing', 'Only calculate new checksums, skipping files with existing hashes') do
|
||||||
|
options.skip_existing = true
|
||||||
|
end
|
||||||
|
opts.on('-v', '--verbose', 'Log every file that is checked') do
|
||||||
|
options.verbose = true
|
||||||
|
end
|
||||||
|
end.parse!
|
||||||
|
|
||||||
|
root_dir = ARGV.shift || '.'
|
||||||
|
unless File.directory?(root_dir)
|
||||||
|
puts "error: #{root_dir} is not directory"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
result = Scrubber.scrub(root_dir, options)
|
||||||
|
|
||||||
|
# TODO print a summary
|
||||||
|
|
||||||
|
# Failures may have been lost in the noise so report them at the
|
||||||
|
# end as well when -v is given.
|
||||||
|
if result.fail? && options.verbose
|
||||||
|
puts
|
||||||
|
puts "*** Failures:"
|
||||||
|
# report failures
|
||||||
|
result.failures.sort do |a,b|
|
||||||
|
a[:filename] <=> b[:filename]
|
||||||
|
end.each do |failure|
|
||||||
|
puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}"
|
||||||
end
|
end
|
||||||
else
|
|
||||||
puts 'Usage: scrub </your/precious/data>'
|
|
||||||
exit 1
|
exit 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue