diff --git a/sort-redis-keys b/sort-redis-keys new file mode 100755 index 0000000..4d62b07 --- /dev/null +++ b/sort-redis-keys @@ -0,0 +1,86 @@ +#!/usr/bin/env ruby + +require 'csv' + +def darwin? + @darwin ||= `uname`.strip == 'Darwin' +end + +def mktemp + if darwin? + `mktemp -t $$`.strip + else + `mktemp`.strip + end +end + +def delimit(n, delimiter = ',') + n.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}") +end + +Units = %w[KB MB GB TB] + +def human_size(n) + unit = 'bytes' + units = Units.dup + while n > 1024 && units.length > 0 + n /= 1024.0 + unit = units.shift + end + "#{delimit(n.round(2))} #{unit}" +end + +def analyze_rdb(rdbfile, keyfile) + size = human_size(File.stat(rdbfile).size) + puts "Getting key info for #{rdbfile} (#{size}), this can take a while..." + `rdb -c memory #{rdbfile} > #{keyfile}` + keyfile +end + +def sort_keys(rdbfile) + keyfile = rdbfile.sub('rdb', 'csv') + sortedkeyfile = rdbfile.sub('.rdb', '-sorted.csv') + if !File.exists?(keyfile) + analyze_rdb(rdbfile, keyfile) + end + lines = File.readlines(keyfile) + lines.shift # drop the header + n = lines.length + print "Munging #{delimit(n)} rows..." + rows = [] + while line = lines.shift + row = line.strip.split(',') + size = row[3].to_i + key = row[2].sub(/^"/, '').sub(/"$/, '') + type = row[1] + num_elements = row[5] + len_largest = row[6] + row = [size, key, type, num_elements, len_largest] + rows << row + if rows.length % (n / 50) == 0 + print '.' + end + end + puts + puts "Sorting..." + rows = rows.sort { |a,b| b[0] <=> a[0] } + print "Writing..." + CSV.open(sortedkeyfile, 'wb') do |csv| + # header + csv << %w[size key type num_elements len_largest_element] + while row = rows.shift + csv << row + if rows.length % (n / 50) == 0 + print '.' + end + end + puts + puts "Done." + end +end + +def main + sort_keys(ARGV.shift) +end + +main if $0 == __FILE__