196 lines
5 KiB
Ruby
Executable file
196 lines
5 KiB
Ruby
Executable file
#!/usr/bin/env ruby
|
|
|
|
require 'json'
|
|
require 'optparse'
|
|
require 'ostruct'
|
|
|
|
class Scrubber
|
|
|
|
attr_reader :failures
|
|
attr_reader :options
|
|
attr_reader :root_dir
|
|
attr_reader :status
|
|
|
|
def self.scrub(root_dir, options = {})
|
|
new(root_dir, options).scrub
|
|
end
|
|
|
|
def initialize(root_dir, options = {})
|
|
@failures = []
|
|
@options = options
|
|
@root_dir = File.realpath(root_dir)
|
|
@status = :ok
|
|
end
|
|
|
|
def scrub(dir = @root_dir)
|
|
scrub_file = scrub_filename(dir)
|
|
if File.exist?(File.join(dir, 'noscrub'))
|
|
if File.exists?(scrub_file)
|
|
File.unlink(scrub_file)
|
|
end
|
|
return self
|
|
end
|
|
|
|
# restore file records if already scrubbed
|
|
existing_file_records = file_records(dir)
|
|
new_file_records = {}
|
|
|
|
# walk the directory
|
|
Dir[File.join(dir, '*')].each do |file|
|
|
|
|
# skip broken symlinks
|
|
next if File.symlink?(file) && !File.exist?(file)
|
|
|
|
# descend into subdirectories
|
|
if File.directory?(file)
|
|
scrub(file)
|
|
|
|
# scrub this file
|
|
else
|
|
basename = File.basename(file)
|
|
next if basename == 'scrub.json'
|
|
file_record = existing_file_records[basename] || {}
|
|
# convert old scrub.json records
|
|
if file_record.is_a?(String)
|
|
puts ">>> converting #{file} in #{scrub_filename(dir)} to new disk format" if options.verbose
|
|
# even out the scrubbing load throughout the month
|
|
random_timestamp = Time.now.to_i - (rand(30) * 86400)
|
|
file_record = { 'hash' => file_record, 'timestamp' => random_timestamp }
|
|
end
|
|
last_scrubbed = file_record['timestamp'] || 0
|
|
# skip files scrubbed in the last 30 days, unless --all was given
|
|
if !options.all && last_scrubbed >= Time.now.to_i - (30 * 86400)
|
|
puts ">>> skipping #{file} as it has been scrubbed recently (#{Time.at(last_scrubbed)})" if options.verbose
|
|
new_file_records[basename] = file_record
|
|
next
|
|
end
|
|
relative_filename = file.sub(@root_dir + '/', '')
|
|
result, hash = scrub_file(file, file_record['hash'])
|
|
case result
|
|
when :ok
|
|
file_record['hash'] = hash
|
|
puts "[ok] #{hash} - #{relative_filename}" if options.verbose
|
|
when :new
|
|
file_record['hash'] = hash
|
|
puts "[new] #{hash} - #{relative_filename}" if options.verbose
|
|
when :fail
|
|
# no change in scrub.json, just report the new sha
|
|
@failures << {
|
|
:filename => relative_filename,
|
|
:hash => hash,
|
|
:expected_hash => file_record['hash']
|
|
}
|
|
@status = :fail
|
|
puts "[FAIL] #{hash} - #{relative_filename} (previously had sha #{expected_hash})"
|
|
end
|
|
file_record['timestamp'] = Time.now.to_i
|
|
new_file_records[basename] = file_record
|
|
end
|
|
end
|
|
|
|
write_file_records(dir, new_file_records)
|
|
self
|
|
end
|
|
|
|
# Returns
|
|
def scrub_file(file, expected_hash)
|
|
basename = File.basename(file)
|
|
hash = sha1(file)
|
|
result =
|
|
if hash == expected_hash
|
|
result = :ok
|
|
elsif expected_hash
|
|
result = :fail
|
|
else
|
|
result = :new
|
|
end
|
|
[result, hash]
|
|
end
|
|
|
|
def ok?
|
|
@status == :ok
|
|
end
|
|
|
|
def fail?
|
|
@status == :fail
|
|
end
|
|
|
|
def sha1(filename)
|
|
# holy quoting batman!
|
|
`shasum "#{filename.gsub(/(\$)/, '\\\\\\1')}"`.split.first
|
|
end
|
|
|
|
def scrub_filename(dir)
|
|
File.join(dir, 'scrub.json')
|
|
end
|
|
|
|
def file_records(dir)
|
|
f = scrub_filename(dir)
|
|
if File.exist?(f)
|
|
JSON.parse(File.read(f))
|
|
else
|
|
{}
|
|
end
|
|
end
|
|
|
|
def write_file_records(dir, records)
|
|
return if options.phantom
|
|
f = scrub_filename(dir)
|
|
if records.size > 0
|
|
File.open(f, 'w') { |f| f.puts(JSON.fast_generate(records)) }
|
|
elsif File.exists?(f)
|
|
File.unlink(f)
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
def main
|
|
options = OpenStruct.new
|
|
options.all = false
|
|
options.phantom = false
|
|
options.verbose = false
|
|
|
|
OptionParser.new do |opts|
|
|
opts.banner = 'Usage: scrub [options] <root-directory>'
|
|
opts.on('-a', '--all', 'Scrub all files no matter when they were last scrubbed.') do
|
|
options.all = true
|
|
end
|
|
opts.on('-h', '--help', 'Show this help') do
|
|
puts opts
|
|
exit
|
|
end
|
|
opts.on('-p', '--phantom', 'Do everything except write scrub.json files. Useful for testing.') do
|
|
options.phantom = true
|
|
end
|
|
opts.on('-v', '--verbose', 'Log every file that is checked') do
|
|
options.verbose = true
|
|
end
|
|
end.parse!
|
|
|
|
root_dir = ARGV.shift || '.'
|
|
unless File.directory?(root_dir)
|
|
puts "error: #{root_dir} is not directory"
|
|
exit 1
|
|
end
|
|
|
|
result = Scrubber.scrub(root_dir, options)
|
|
|
|
# TODO print a summary
|
|
|
|
# Failures may have been lost in the noise so report them at the
|
|
# end as well when -v is given.
|
|
if result.fail? && options.verbose
|
|
puts
|
|
puts "*** Failures:"
|
|
# report failures
|
|
result.failures.sort do |a,b|
|
|
a[:filename] <=> b[:filename]
|
|
end.each do |failure|
|
|
puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}"
|
|
end
|
|
exit 1
|
|
end
|
|
end
|
|
|
|
main if $0 == __FILE__
|