mirror of
https://github.com/1SecondEveryday/image-analysis-eval.git
synced 2026-03-25 09:05:49 +00:00
Add an untested script to run batches in parallel on runpod
This commit is contained in:
parent
0848b43304
commit
aa2b7abc2f
1 changed files with 137 additions and 0 deletions
137
runpod-parallel.rb
Executable file
137
runpod-parallel.rb
Executable file
|
|
@ -0,0 +1,137 @@
|
|||
#!/usr/bin/env ruby -w
|
||||
# Ruby 3.4 script: parallel batch inference across multiple RunPod VMs
|
||||
|
||||
require 'json'
|
||||
require 'open3'
|
||||
require 'thread'
|
||||
|
||||
# ─── CONFIG ────────────────────────────────────────────────────────────────
|
||||
|
||||
BATCH_DIR = ARGV.fetch(0, './images') # local directory of your images
|
||||
RESULTS_DIR = ARGV.fetch(1, './results') # where to save each combo’s results
|
||||
CONTAINER_IMG = 'runpod/pytorch:2.1-cuda11.8' # image with Ruby & Ollama installed
|
||||
SSH_USER = 'root' # default for RunPod pods
|
||||
|
||||
# Define your model-prompt combos here:
|
||||
COMBOS = [
|
||||
{
|
||||
name: 'llava7b',
|
||||
model: 'llava:7b',
|
||||
system_prompt: 'You are an image tagger.',
|
||||
user_prompt: 'Describe objects and mood.',
|
||||
temperature: 0.7,
|
||||
gpu_type: 'RTX 3090'
|
||||
},
|
||||
{
|
||||
name: 'llava13b',
|
||||
model: 'llava:13b',
|
||||
system_prompt: 'You are an image tagger.',
|
||||
user_prompt: 'Describe objects and mood.',
|
||||
temperature: 0.7,
|
||||
gpu_type: 'RTX 4090'
|
||||
},
|
||||
{
|
||||
name: 'gemma7b',
|
||||
model: 'gemma:7b',
|
||||
system_prompt: 'You are an image tagger.',
|
||||
user_prompt: 'Describe objects and mood.',
|
||||
temperature: 0.7,
|
||||
gpu_type: 'RTX 3090'
|
||||
}
|
||||
]
|
||||
|
||||
# ─── UTILITY METHODS ───────────────────────────────────────────────────────
|
||||
|
||||
def run_cmd(cmd)
|
||||
puts "▶ #{cmd}"
|
||||
raise "Command failed: #{cmd}" unless system(cmd)
|
||||
end
|
||||
|
||||
def capture_json(cmd)
|
||||
out, status = Open3.capture3(cmd)
|
||||
raise "Failed JSON cmd: #{cmd}" unless status.success?
|
||||
JSON.parse(out)
|
||||
end
|
||||
|
||||
def wait_for_pod(pod_id)
|
||||
loop do
|
||||
info = capture_json("runpodctl get pod #{pod_id} -o json")
|
||||
status = info.dig('status') || info['status']
|
||||
break if status == 'Running'
|
||||
sleep 5
|
||||
end
|
||||
end
|
||||
|
||||
def public_ip(pod_id)
|
||||
info = capture_json("runpodctl get pod #{pod_id} -o json")
|
||||
info['publicIp'] || info['ip'] || raise("No IP for pod #{pod_id}")
|
||||
end
|
||||
|
||||
# ─── WORKER ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def process_combo(combo)
|
||||
pod_info = capture_json(
|
||||
%W[
|
||||
runpodctl create pods
|
||||
--name batch-#{combo[:name]}
|
||||
--gpuType #{combo[:gpu_type]}
|
||||
--imageName #{CONTAINER_IMG}
|
||||
--containerDiskSize 10
|
||||
--volumeSize 50
|
||||
--ports '22/tcp' \
|
||||
--args "bash -lc '\
|
||||
apt update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt install -y openssh-server && \
|
||||
mkdir -p /root/.ssh && \
|
||||
echo \"$SSH_PUB\" > /root/.ssh/authorized_keys && \
|
||||
chmod 700 /root/.ssh && chmod 600 /root/.ssh/authorized_keys && \
|
||||
service ssh start && \
|
||||
sleep infinity\
|
||||
'"
|
||||
-o json
|
||||
].join(' ')
|
||||
)
|
||||
|
||||
pod_id = pod_info.fetch('podId')
|
||||
ip = public_ip(pod_id)
|
||||
puts "▶ #{combo[:name]} p̶o̶d̶ #{pod_id} @ #{ip}"
|
||||
|
||||
wait_for_pod(pod_id)
|
||||
puts "✔ #{combo[:name]} pod ready"
|
||||
|
||||
# send images & run script
|
||||
run_cmd "runpodctl send #{BATCH_DIR} --podId #{pod_id}"
|
||||
run_cmd "runpodctl send run_batch.rb --podId #{pod_id}"
|
||||
|
||||
# execute remotely via SSH (assumes SSH key already added to runpod)
|
||||
# after pod is Running…
|
||||
ssh_base = capture_json("runpodctl get pod #{pod_id} -o json")["sshInfo"]
|
||||
# build your remote command
|
||||
remote = %Q{cd /workspace && \
|
||||
ruby run_batch.rb \
|
||||
--model #{combo[:model]} \
|
||||
--system-prompt #{combo[:system_prompt].dump} \
|
||||
--user-prompt #{combo[:user_prompt].dump} \
|
||||
--temperature #{combo[:temperature]} \
|
||||
--output results-#{combo[:name]}.json}
|
||||
# combine and run
|
||||
run_cmd "#{ssh_base} -o StrictHostKeyChecking=no -- #{remote}"
|
||||
|
||||
# fetch results
|
||||
run_cmd "runpodctl receive #{pod_id} --remotePath /workspace/results-#{combo[:name]}.json --localPath #{RESULTS_DIR}/results-#{combo[:name]}.json"
|
||||
|
||||
# clean up
|
||||
run_cmd "runpodctl remove pod #{pod_id}"
|
||||
puts "✅ #{combo[:name]} done"
|
||||
end
|
||||
|
||||
# ─── MAIN ───────────────────────────────────────────────────────────────────
|
||||
|
||||
Dir.mkdir(RESULTS_DIR) unless Dir.exist?(RESULTS_DIR)
|
||||
|
||||
threads = COMBOS.map do |combo|
|
||||
Thread.new { process_combo(combo) }
|
||||
end
|
||||
|
||||
threads.each(&:join)
|
||||
puts "All batches complete. Results in #{RESULTS_DIR}/"
|
||||
Loading…
Reference in a new issue