#!/usr/bin/env ruby require 'json' require 'optparse' require 'ostruct' class Scrubber attr_reader :failures attr_reader :options attr_reader :root_dir attr_reader :status def self.scrub(root_dir, options = {}) new(root_dir, options).scrub end def initialize(root_dir, options = {}) @failures = [] @options = options @root_dir = File.realpath(root_dir) @status = :ok end def scrub(dir = @root_dir) scrub_file = scrub_filename(dir) if File.exist?(File.join(dir, 'noscrub')) if File.exists?(scrub_file) File.unlink(scrub_file) end return self end # restore file records if already scrubbed existing_file_records = file_records(dir) new_file_records = {} # walk the directory Dir[File.join(dir, '*')].each do |file| # skip broken symlinks next if File.symlink?(file) && !File.exist?(file) # descend into subdirectories if File.directory?(file) scrub(file) # scrub this file else basename = File.basename(file) next if basename == 'scrub.json' file_record = existing_file_records[basename] || {} # convert old scrub.json records if file_record.is_a?(String) puts ">>> converting #{file} in #{scrub_filename(dir)} to new disk format" if options.verbose # even out the scrubbing load throughout the month random_timestamp = Time.now.to_i - (rand(30) * 86400) file_record = { 'hash' => file_record, 'timestamp' => random_timestamp } end last_scrubbed = file_record['timestamp'] || 0 # skip files scrubbed in the last 30 days, unless --all was given if !options.all && last_scrubbed >= Time.now.to_i - (30 * 86400) puts ">>> skipping #{file} as it has been scrubbed recently (#{Time.at(last_scrubbed)})" if options.verbose new_file_records[basename] = file_record next end relative_filename = file.sub(@root_dir + '/', '') result, hash = scrub_file(file, file_record['hash']) case result when :ok file_record['hash'] = hash puts "[ok] #{hash} - #{relative_filename}" if options.verbose when :new file_record['hash'] = hash puts "[new] #{hash} - #{relative_filename}" if options.verbose when :fail # no change in scrub.json, just report the new sha @failures << { :filename => relative_filename, :hash => hash, :expected_hash => file_record['hash'] } @status = :fail puts "[FAIL] #{hash} - #{relative_filename} (previously had sha #{expected_hash})" end file_record['timestamp'] = Time.now.to_i new_file_records[basename] = file_record end end write_file_records(dir, new_file_records) self end # Returns def scrub_file(file, expected_hash) basename = File.basename(file) hash = sha1(file) result = if hash == expected_hash result = :ok elsif expected_hash result = :fail else result = :new end [result, hash] end def ok? @status == :ok end def fail? @status == :fail end def sha1(filename) # holy quoting batman! `shasum "#{filename.gsub(/(\$)/, '\\\\\\1')}"`.split.first end def scrub_filename(dir) File.join(dir, 'scrub.json') end def file_records(dir) f = scrub_filename(dir) if File.exist?(f) JSON.parse(File.read(f)) else {} end end def write_file_records(dir, records) return if options.phantom f = scrub_filename(dir) if records.size > 0 File.open(f, 'w') { |f| f.puts(JSON.fast_generate(records)) } elsif File.exists?(f) File.unlink(f) end end end def main options = OpenStruct.new options.all = false options.phantom = false options.verbose = false OptionParser.new do |opts| opts.banner = 'Usage: scrub [options] ' opts.on('-a', '--all', 'Scrub all files no matter when they were last scrubbed.') do options.all = true end opts.on('-h', '--help', 'Show this help') do puts opts exit end opts.on('-p', '--phantom', 'Do everything except write scrub.json files. Useful for testing.') do options.phantom = true end opts.on('-v', '--verbose', 'Log every file that is checked') do options.verbose = true end end.parse! root_dir = ARGV.shift || '.' unless File.directory?(root_dir) puts "error: #{root_dir} is not directory" exit 1 end result = Scrubber.scrub(root_dir, options) # TODO print a summary # Failures may have been lost in the noise so report them at the # end as well when -v is given. if result.fail? && options.verbose puts puts "*** Failures:" # report failures result.failures.sort do |a,b| a[:filename] <=> b[:filename] end.each do |failure| puts "#{failure[:filename]}: expected #{failure[:expected_hash]}, but got #{failure[:hash]}" end exit 1 end end main if $0 == __FILE__