aboutsummaryrefslogtreecommitdiffstats
path: root/runpod-session.sh
diff options
context:
space:
mode:
Diffstat (limited to 'runpod-session.sh')
-rwxr-xr-xrunpod-session.sh608
1 files changed, 608 insertions, 0 deletions
diff --git a/runpod-session.sh b/runpod-session.sh
new file mode 100755
index 0000000..c81e8dd
--- /dev/null
+++ b/runpod-session.sh
@@ -0,0 +1,608 @@
+#!/usr/bin/env bash
+# runpod-session.sh — Manage RunPod Ollama sessions for opencode
+#
+# Usage:
+# runpod-session.sh [OPTIONS]
+#
+# Options:
+# --model MODEL Ollama model tag to warm up (e.g. qwen3-coder:latest)
+# Defaults to DEFAULT_MODEL in config
+# --all-models Warm up ALL models listed in WARMUP_MODELS config
+# --gpu-type 'NAME' Preferred GPU display name (partial match, case-insensitive)
+# --max-price PRICE Max $/hr ceiling (default: MAX_PRICE_PER_HR in config)
+# --new Force creation of a new pod (skip restart logic)
+# --stop Stop the current running pod
+# --status Show current session state and reachability
+# --help Show this help
+#
+# Requires: curl, jq
+# Config: ~/.config/runpod-session/config (auto-created on first run)
+
+set -euo pipefail
+
+# ─── Paths ────────────────────────────────────────────────────────────────────
+OPENCODE_CONFIG="$HOME/.config/opencode/opencode.jsonc"
+SESSION_CONFIG_DIR="$HOME/.config/runpod-session"
+SESSION_CONFIG="$SESSION_CONFIG_DIR/config"
+SESSION_STATE="$SESSION_CONFIG_DIR/state.json"
+RUNPOD_API="https://api.runpod.io/graphql"
+
+# ─── Defaults (overridden by config file) ─────────────────────────────────────
+OLLAMA_IMAGE="ollama/ollama:latest"
+NETWORK_VOLUME_NAME="danixland-storage"
+OPENCODE_PROVIDER="runpod"
+DEFAULT_MODEL="qwen3-coder:latest"
+WARMUP_MODELS="qwen3-coder:latest translategemma:27b"
+MAX_PRICE_PER_HR=2.50
+CONTAINER_DISK_GB=15
+DEFAULT_GPU_TYPE=""
+GPU_COUNT=1
+POLL_INTERVAL=5
+STARTUP_TIMEOUT=240
+WARMUP_NUM_CTX=32768
+
+# ─── Colors ───────────────────────────────────────────────────────────────────
+RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'
+CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'
+
+log() { echo -e "${CYAN}[runpod]${RESET} $*" >&2; }
+ok() { echo -e "${GREEN}[ok]${RESET} $*" >&2; }
+warn() { echo -e "${YELLOW}[warn]${RESET} $*" >&2; }
+die() { echo -e "${RED}[error]${RESET} $*" >&2; exit 1; }
+
+# ─── Dependency check ─────────────────────────────────────────────────────────
+for cmd in curl jq; do
+ command -v "$cmd" &>/dev/null || die "Required command not found: $cmd"
+done
+
+# ─── Config bootstrap ─────────────────────────────────────────────────────────
+mkdir -p "$SESSION_CONFIG_DIR"
+
+if [[ ! -f "$SESSION_CONFIG" ]]; then
+ warn "No config found — creating $SESSION_CONFIG"
+ cat > "$SESSION_CONFIG" <<'CONF'
+# runpod-session configuration — edit then re-run.
+
+RUNPOD_API_KEY=""
+
+# Network volume name as shown in RunPod dashboard
+NETWORK_VOLUME_NAME="danixland-storage"
+
+# Must match the key in your opencode.json "provider" block
+OPENCODE_PROVIDER="runpod"
+
+# Model to activate by default (used when --model is not passed)
+DEFAULT_MODEL="qwen3-coder:latest"
+
+# All models that live on this pod — space-separated Ollama tags.
+# These get registered in opencode.json and are warmed up with --all-models.
+WARMUP_MODELS="qwen3-coder:latest translategemma:27b"
+
+# GPU selection
+DEFAULT_GPU_TYPE="" # e.g. "RTX PRO 6000" — empty = cheapest available
+MAX_PRICE_PER_HR=2.50 # hard $/hr ceiling
+
+# Pod configuration
+CONTAINER_DISK_GB=15
+GPU_COUNT=1
+STARTUP_TIMEOUT=240 # seconds before giving up waiting for Ollama
+WARMUP_NUM_CTX=32768 # num_ctx used when warming up models into VRAM
+CONF
+ echo ""
+ echo -e "${YELLOW}Edit ${BOLD}$SESSION_CONFIG${RESET}${YELLOW}, set RUNPOD_API_KEY, then re-run.${RESET}"
+ exit 0
+fi
+
+# shellcheck source=/dev/null
+source "$SESSION_CONFIG"
+[[ -z "${RUNPOD_API_KEY:-}" ]] && die "RUNPOD_API_KEY not set in $SESSION_CONFIG"
+
+# ─── Argument parsing ─────────────────────────────────────────────────────────
+OPT_MODEL="${DEFAULT_MODEL}"
+OPT_ALL_MODELS=0
+OPT_GPU_TYPE="${DEFAULT_GPU_TYPE:-}"
+OPT_MAX_PRICE="${MAX_PRICE_PER_HR}"
+OPT_FORCE_NEW=0
+OPT_STOP=0
+OPT_STATUS=0
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --model) OPT_MODEL="$2"; shift 2 ;;
+ --all-models) OPT_ALL_MODELS=1; shift ;;
+ --gpu-type) OPT_GPU_TYPE="$2"; shift 2 ;;
+ --max-price) OPT_MAX_PRICE="$2"; shift 2 ;;
+ --new) OPT_FORCE_NEW=1; shift ;;
+ --stop) OPT_STOP=1; shift ;;
+ --status) OPT_STATUS=1; shift ;;
+ --help|-h) sed -n '2,17p' "$0"; exit 0 ;;
+ *) die "Unknown option: $1 (use --help)" ;;
+ esac
+done
+
+# ─── RunPod GraphQL helper ────────────────────────────────────────────────────
+gql() {
+ local err
+ local response
+ response=$(curl --ipv4 -s \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer $RUNPOD_API_KEY" \
+ -d "$1" \
+ "$RUNPOD_API")
+ if [[ $? -ne 0 || -z "$response" ]]; then
+ die "RunPod API request failed — check API key and connectivity"
+ fi
+ err=$(echo "$response" | jq -r '.errors[0].message // empty' 2>/dev/null || true)
+ [[ -n "$err" ]] && die "RunPod API error: $err"
+ echo "$response"
+}
+
+# ─── Pod queries ──────────────────────────────────────────────────────────────
+get_pods() {
+ gql '{"query":"{ myself { pods { id name desiredStatus costPerHr runtime { uptimeInSeconds } machine { gpuDisplayName } } } }"}'
+}
+
+find_ollama_pod() {
+ # Returns compact JSON of the first pod matching "ollama" in its name, or empty string
+ local result
+ result=$(echo "$1" | jq -c \
+ '.data.myself.pods[] | select(.name | test("ollama"; "i"))' 2>/dev/null | head -1)
+ echo "$result"
+}
+
+# ─── --status subcommand ──────────────────────────────────────────────────────
+cmd_status() {
+ echo ""
+ echo -e "${BOLD}runpod-session status${RESET}"
+ echo ""
+
+ # ── Live pod data from API ────────────────────────────────────────────────
+ local pods_json pod_json
+ pods_json=$(get_pods)
+ pod_json=$(find_ollama_pod "$pods_json")
+
+ if [[ -n "$pod_json" ]]; then
+ local pod_id status gpu cost_hr uptime_sec cost_so_far
+ pod_id=$(echo "$pod_json" | jq -r '.id')
+ status=$(echo "$pod_json" | jq -r '.desiredStatus')
+ gpu=$(echo "$pod_json" | jq -r '.machine.gpuDisplayName // "?"')
+ cost_hr=$(echo "$pod_json" | jq -r '.costPerHr // 0')
+ uptime_sec=$(echo "$pod_json" | jq -r '.runtime.uptimeInSeconds // 0')
+
+ # Calculate cost: (uptime_seconds / 3600) * cost_per_hr
+ cost_so_far=$(echo "$uptime_sec $cost_hr" | awk '{printf "%.4f", ($1/3600)*$2}')
+ uptime_human=$(echo "$uptime_sec" | awk '{
+ h=int($1/3600); m=int(($1%3600)/60); s=$1%60
+ if (h>0) printf "%dh %dm %ds", h, m, s
+ else if (m>0) printf "%dm %ds", m, s
+ else printf "%ds", s
+ }')
+
+ echo -e " Pod: ${BOLD}${pod_id}${RESET}"
+ echo -e " GPU: ${gpu}"
+ echo -e " Status: ${status}"
+ echo -e " Rate: \$${cost_hr}/hr"
+ if [[ "$status" == "RUNNING" ]]; then
+ echo -e " Uptime: ${uptime_human}"
+ echo -e " Cost: \$${cost_so_far} this session"
+ else
+ echo -e " Uptime: —"
+ echo -e " Cost: —"
+ fi
+ echo ""
+
+ # ── Ollama reachability ───────────────────────────────────────────────
+ local base="https://${pod_id}-11434.proxy.runpod.net"
+ local tags_response
+ tags_response=$(curl -s --ipv4 --max-time 5 "${base}/api/tags" 2>/dev/null || true)
+ if echo "$tags_response" | jq -e '.models' > /dev/null 2>&1; then
+ echo -e " Ollama: ${GREEN}reachable${RESET}"
+ local models
+ models=$(echo "$tags_response" | jq -r '.models[].name' 2>/dev/null || true)
+ if [[ -n "$models" ]]; then
+ echo -e " In VRAM: $(echo "$models" | tr '\n' ' ')"
+ else
+ echo -e " In VRAM: none (model will load on first request)"
+ fi
+ else
+ echo -e " Ollama: ${YELLOW}not reachable${RESET}"
+ fi
+ else
+ echo -e " ${YELLOW}No Ollama pod found in your account.${RESET}"
+ fi
+
+ # ── Saved session state ───────────────────────────────────────────────────
+ if [[ -f "$SESSION_STATE" ]]; then
+ echo ""
+ echo -e " ${BOLD}Last session record:${RESET}"
+ jq -r '" Started: \(.started_at)\n Model: \(.model)\n URL: \(.ollama_url)"' \
+ "$SESSION_STATE"
+ fi
+
+ echo ""
+}
+
+# ─── Pod lifecycle ────────────────────────────────────────────────────────────
+restart_pod() {
+ log "Restarting pod $1 ..."
+ gql "{\"query\":\"mutation { podResume(input: { podId: \\\"$1\\\", gpuCount: $GPU_COUNT }) { id desiredStatus } }\"}" > /dev/null
+}
+
+stop_pod() {
+ log "Stopping pod $1 ..."
+ gql "{\"query\":\"mutation { podStop(input: { podId: \\\"$1\\\" }) { id desiredStatus } }\"}" > /dev/null
+ ok "Pod stopped. Storage costs continue while stopped (~\$0.002/hr)."
+}
+
+terminate_pod() {
+ log "Terminating pod $1 ..."
+ gql "{\"query\":\"mutation { podTerminate(input: { podId: \\\"$1\\\" }) }\"}" > /dev/null
+ ok "Pod terminated."
+}
+
+# ─── --stop subcommand ────────────────────────────────────────────────────────
+cmd_stop() {
+ local pod_id=""
+ if [[ -f "$SESSION_STATE" ]]; then
+ pod_id=$(jq -r '.pod_id' "$SESSION_STATE")
+ else
+ local pods_json pod_json
+ pods_json=$(get_pods)
+ pod_json=$(find_ollama_pod "$pods_json")
+ [[ -z "$pod_json" ]] && die "No Ollama pod found in your account."
+ pod_id=$(echo "$pod_json" | jq -r '.id')
+ fi
+ echo -n " Stop pod $pod_id? [y/N] "
+ read -r confirm
+ [[ "${confirm,,}" != "y" ]] && { log "Aborted."; exit 0; }
+ stop_pod "$pod_id"
+ rm -f "$SESSION_STATE"
+}
+
+# ─── GPU selection ────────────────────────────────────────────────────────────
+# Returns a JSON array of candidates sorted by price (preferred first if set).
+# Caller iterates and tries each until one succeeds.
+get_gpu_candidates() {
+ local preferred="$1" max_price="$2"
+ log "Querying available GPUs (secure cloud, max \$$max_price/hr) ..." >&2
+
+ local result candidates
+ result=$(curl -s --ipv4 \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer $RUNPOD_API_KEY" \
+ -d '{"query":"{ gpuTypes { id displayName memoryInGb secureCloud lowestPrice(input: { gpuCount: 1 }) { uninterruptablePrice } } }"}' \
+ "$RUNPOD_API")
+
+ candidates=$(echo "$result" | jq -c \
+ --arg max "$max_price" \
+ '[.data.gpuTypes[]
+ | select(.secureCloud == true)
+ | select((.lowestPrice.uninterruptablePrice // 0) > 0)
+ | select(.lowestPrice.uninterruptablePrice <= ($max | tonumber))
+ | { id: .id, name: .displayName, vram: .memoryInGb,
+ price: .lowestPrice.uninterruptablePrice }
+ ] | sort_by(.price)')
+
+ [[ -z "$candidates" || "$candidates" == "[]" ]] && \
+ die "No GPUs on secure cloud within \$$max_price/hr. Try --max-price."
+
+ # Bubble preferred GPU to front so it's tried first
+ if [[ -n "$preferred" ]]; then
+ local reordered
+ reordered=$(echo "$candidates" | jq -c \
+ --arg p "$preferred" \
+ '( [.[] | select(.name | test($p; "i"))] ) +
+ ( [.[] | select(.name | test($p; "i") | not)] )')
+ local pcount
+ pcount=$(echo "$reordered" | jq 'map(select(.name | test($p; "i"))) | length' --arg p "$preferred" 2>/dev/null || echo 0)
+ if [[ "$pcount" -eq 0 ]]; then
+ warn "Preferred GPU '$preferred' not in catalog within price limit. Will try all." >&2
+ fi
+ candidates="$reordered"
+ fi
+
+ echo "$candidates"
+}
+
+# ─── Network volume ───────────────────────────────────────────────────────────
+get_network_volume_id() {
+ local result vol_id
+ result=$(curl -s --ipv4 \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer $RUNPOD_API_KEY" \
+ -d '{"query":"{ myself { networkVolumes { id name } } }"}' \
+ "$RUNPOD_API")
+ vol_id=$(echo "$result" | jq -r \
+ --arg n "$NETWORK_VOLUME_NAME" \
+ '.data.myself.networkVolumes[] | select(.name == $n) | .id')
+ [[ -z "$vol_id" ]] && die "Network volume '$NETWORK_VOLUME_NAME' not found."
+ echo "$vol_id"
+}
+
+# ─── Create pod ───────────────────────────────────────────────────────────────
+create_pod() {
+ local gpu_id="$1" vol_id="$2"
+ log "Creating pod ..."
+
+ # Build env array — omit OLLAMA_MODELS entry if path is unset
+ local env_json
+ env_json='[{"key":"OLLAMA_HOST","value":"0.0.0.0"},{"key":"OLLAMA_LOAD_TIMEOUT","value":"10m"},{"key":"OLLAMA_KEEP_ALIVE","value":"1h"}]'
+ if [[ -n "${OLLAMA_MODELS_PATH:-}" ]]; then
+ env_json=$(echo "$env_json" | jq \
+ --arg v "$OLLAMA_MODELS_PATH" \
+ '. + [{"key":"OLLAMA_MODELS","value":$v}]')
+ fi
+
+ local payload
+ payload=$(jq -n \
+ --arg gpu_id "$gpu_id" \
+ --arg vol_id "$vol_id" \
+ --arg image "$OLLAMA_IMAGE" \
+ --argjson gpu_count "$GPU_COUNT" \
+ --argjson disk "$CONTAINER_DISK_GB" \
+ --argjson env "$env_json" \
+ '{query: "mutation($input: PodFindAndDeployOnDemandInput!) { podFindAndDeployOnDemand(input: $input) { id } }",
+ variables: { input: {
+ cloudType: "SECURE",
+ gpuCount: $gpu_count,
+ volumeInGb: 0,
+ containerDiskInGb: $disk,
+ minVcpuCount: 4,
+ minMemoryInGb: 15,
+ gpuTypeId: $gpu_id,
+ name: "ollama-session",
+ imageName: $image,
+ ports: "11434/http",
+ volumeMountPath: "/workspace",
+ networkVolumeId: $vol_id,
+ env: $env
+ }}}')
+
+ local result pod_id err_code
+ result=$(curl -s --ipv4 \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer $RUNPOD_API_KEY" \
+ -d "$payload" \
+ "$RUNPOD_API")
+
+ pod_id=$(echo "$result" | jq -r '.data.podFindAndDeployOnDemand.id // empty')
+ if [[ -n "$pod_id" ]]; then
+ echo "$pod_id"
+ return 0
+ fi
+
+ err_code=$(echo "$result" | jq -r '.errors[0].extensions.code // empty')
+ if [[ "$err_code" == "SUPPLY_CONSTRAINT" ]]; then
+ warn "No supply for GPU $gpu_id — trying next candidate ..." >&2
+ return 1
+ fi
+ die "Pod creation failed. Response: $result"
+}
+
+# ─── Wait for Ollama ──────────────────────────────────────────────────────────
+wait_for_pod() {
+ local pod_id="$1"
+ local url="https://${pod_id}-11434.proxy.runpod.net"
+ local elapsed=0
+ log "Polling $url/api/tags (timeout: ${STARTUP_TIMEOUT}s) ..."
+ while (( elapsed < STARTUP_TIMEOUT )); do
+ local response
+ response=$(curl -s --ipv4 --max-time 5 "${url}/api/tags" 2>/dev/null || true)
+ if echo "$response" | jq -e '.models' > /dev/null 2>&1; then
+ echo ""; ok "Ollama is up."; return 0
+ fi
+ printf " [%3ds] waiting...\r" "$elapsed"
+ sleep "$POLL_INTERVAL"
+ (( elapsed += POLL_INTERVAL ))
+ done
+ echo ""
+ die "Timed out after ${STARTUP_TIMEOUT}s. Check the RunPod dashboard."
+}
+
+# ─── Patch opencode.json ──────────────────────────────────────────────────────
+#
+# What this changes in your opencode.json:
+# .provider.runpod.options.baseURL → https://<pod-id>-11434.proxy.runpod.net/v1
+# .model → runpod/qwen3-coder:latest (DEFAULT_MODEL)
+# .provider.runpod.models → merges all WARMUP_MODELS in (preserving
+# any existing per-model config you have)
+#
+# Uses jq's `*` (recursive merge) so your existing model overrides are never clobbered.
+# A .bak backup is written before any changes.
+#
+patch_opencode_config() {
+ local new_url="$1" # full URL including /v1
+
+ [[ ! -f "$OPENCODE_CONFIG" ]] && die "opencode config not found at $OPENCODE_CONFIG"
+ cp "$OPENCODE_CONFIG" "${OPENCODE_CONFIG}.bak"
+
+ # Build a jq object for all warmup models: { "model:tag": {"tools":true}, ... }
+ # tools:true is a safe default — it won't override existing per-model settings
+ # because we merge with * where existing config wins on conflicts.
+ local models_patch="{}"
+ for m in $WARMUP_MODELS; do
+ models_patch=$(printf '%s' "$models_patch" \
+ | jq --arg m "$m" '. + {($m): {"tools": true}}')
+ done
+
+ local tmp
+ tmp=$(mktemp)
+ jq \
+ --arg provider "$OPENCODE_PROVIDER" \
+ --arg url "$new_url" \
+ --arg model "${OPENCODE_PROVIDER}/${DEFAULT_MODEL}" \
+ --argjson patch "$models_patch" \
+ '
+ .provider[$provider].options.baseURL = $url
+ | .model = $model
+ | .provider[$provider].models = (
+ $patch * (.provider[$provider].models // {})
+ )
+ ' "$OPENCODE_CONFIG" > "$tmp" && mv "$tmp" "$OPENCODE_CONFIG"
+
+ ok "opencode.json patched:"
+ log " provider.${OPENCODE_PROVIDER}.options.baseURL = $new_url"
+ log " model = ${OPENCODE_PROVIDER}/${DEFAULT_MODEL}"
+}
+
+# ─── Warm up one model ────────────────────────────────────────────────────────
+warmup_model() {
+ local pod_id="$1" model="$2"
+ local base="https://${pod_id}-11434.proxy.runpod.net"
+
+ log "Warming up '$model' into VRAM ..."
+
+ if curl -s --ipv4 --max-time 300 -X POST "${base}/api/generate" \
+ -H "Content-Type: application/json" \
+ -d "{\"model\": \"$model\", \"prompt\": \"hi\", \"stream\": false, \"options\": {\"num_ctx\": ${WARMUP_NUM_CTX:-32768}}}" \
+ > /dev/null 2>&1; then
+ ok " '$model' is loaded."
+ else
+ warn " Warmup for '$model' failed — model will load on first use."
+ fi
+}
+
+# ─── State persistence ────────────────────────────────────────────────────────
+save_state() {
+ jq -n \
+ --arg pod_id "$1" \
+ --arg url "$2" \
+ --arg model "$3" \
+ --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+ '{ pod_id: $pod_id, ollama_url: $url, model: $model, started_at: $ts }' \
+ > "$SESSION_STATE"
+}
+
+# ─── Main ─────────────────────────────────────────────────────────────────────
+main() {
+ [[ $OPT_STATUS -eq 1 ]] && { cmd_status; exit 0; }
+ [[ $OPT_STOP -eq 1 ]] && { cmd_stop; exit 0; }
+
+ echo -e "${BOLD}runpod-session${RESET} — Ollama on RunPod → opencode"
+ echo ""
+
+ local pod_id=""
+ local _skip_wait=0
+
+ # ── 1. Existing pod check ─────────────────────────────────────────────────
+ if [[ $OPT_FORCE_NEW -eq 0 ]]; then
+ log "Checking for existing Ollama pods ..."
+ local pods_json pod_json
+ pods_json=$(get_pods)
+ pod_json=$(find_ollama_pod "$pods_json")
+
+ if [[ -n "$pod_json" ]]; then
+ pod_id=$(echo "$pod_json" | jq -r '.id')
+ local status gpu cost
+ status=$(echo "$pod_json" | jq -r '.desiredStatus')
+ gpu=$(echo "$pod_json" | jq -r '.machine.gpuDisplayName // "?"')
+ cost=$(echo "$pod_json" | jq -r '.costPerHr // "?"')
+
+ echo -e " Found: ${BOLD}${pod_id}${RESET} GPU: ${gpu} \$${cost}/hr Status: ${status}"
+ echo ""
+
+ case "$status" in
+ RUNNING)
+ local _check
+ _check=$(curl -s --ipv4 --max-time 5 "https://${pod_id}-11434.proxy.runpod.net/api/tags" 2>/dev/null || true)
+ if echo "$_check" | jq -e '.models' > /dev/null 2>&1; then
+ ok "Already running and reachable — skipping startup sequence."
+ _skip_wait=1
+ else
+ log "Pod is running but Ollama not yet reachable — waiting ..."
+ fi
+ ;;
+ EXITED|STOPPED)
+ echo -n " [R]estart [D]elete and create new [A]bort [R/d/a]: "
+ read -r choice
+ case "${choice,,}" in
+ d) terminate_pod "$pod_id"; pod_id="" ;;
+ a) log "Aborted."; exit 0 ;;
+ *) restart_pod "$pod_id" ;;
+ esac
+ ;;
+ *)
+ warn "Unexpected pod state '$status' — ignoring this pod."
+ pod_id=""
+ ;;
+ esac
+ else
+ log "No existing Ollama pod found."
+ fi
+ fi
+
+ # ── 2. Create new pod if needed ───────────────────────────────────────────
+ if [[ -z "$pod_id" ]]; then
+ local vol_id candidates gpu_json gpu_id gpu_name gpu_vram gpu_price
+ vol_id=$(get_network_volume_id)
+ ok "Volume: ${NETWORK_VOLUME_NAME} ($vol_id)"
+
+ candidates=$(get_gpu_candidates "$OPT_GPU_TYPE" "$OPT_MAX_PRICE")
+ local count
+ count=$(echo "$candidates" | jq 'length')
+ [[ "$count" -eq 0 ]] && die "No GPU candidates found."
+
+ log "${count} GPU options within budget. Will prompt for each."
+
+ local i=0
+ while [[ $i -lt $count ]]; do
+ gpu_json=$(echo "$candidates" | jq -c --argjson i "$i" '.[$i]')
+ gpu_id=$(echo "$gpu_json" | jq -r '.id')
+ gpu_name=$(echo "$gpu_json" | jq -r '.name')
+ gpu_vram=$(echo "$gpu_json" | jq -r '.vram')
+ gpu_price=$(echo "$gpu_json" | jq -r '.price')
+
+ echo ""
+ echo -e " ${BOLD}${gpu_name}${RESET} ${gpu_vram}GB VRAM \$${gpu_price}/hr"
+ echo -n " Create pod with this GPU? [Y/n/a(bort)] "
+ read -r choice
+ case "${choice,,}" in
+ a) log "Aborted."; exit 0 ;;
+ n) (( i++ )) || true; continue ;;
+ esac
+
+ if pod_id=$(create_pod "$gpu_id" "$vol_id"); then
+ ok "Pod created: $pod_id GPU: ${gpu_name}"
+ break
+ fi
+ (( i++ )) || true
+ done
+
+ [[ -z "$pod_id" ]] && die "All ${count} GPU candidates exhausted. Try --max-price or later."
+ fi
+
+ # ── 3. Wait for Ollama if needed ─────────────────────────────────────────
+ if [[ "${_skip_wait:-0}" != "1" ]]; then
+ wait_for_pod "$pod_id"
+ fi
+
+ # ── 4. Final URL ──────────────────────────────────────────────────────────
+ local ollama_url="https://${pod_id}-11434.proxy.runpod.net/v1"
+
+ # ── 5. Patch opencode.json ────────────────────────────────────────────────
+ patch_opencode_config "$ollama_url"
+
+ # ── 6. Warmup ─────────────────────────────────────────────────────────────
+ if [[ $OPT_ALL_MODELS -eq 1 ]]; then
+ for m in $WARMUP_MODELS; do
+ warmup_model "$pod_id" "$m"
+ done
+ elif [[ -n "$OPT_MODEL" ]]; then
+ warmup_model "$pod_id" "$OPT_MODEL"
+ else
+ warn "No warmup requested. Use --model MODEL or --all-models."
+ fi
+
+ # ── 7. Save state ─────────────────────────────────────────────────────────
+ save_state "$pod_id" "$ollama_url" "$OPT_MODEL"
+
+ # ── 8. Done ───────────────────────────────────────────────────────────────
+ echo ""
+ echo -e "${BOLD}${GREEN}Ready.${RESET}"
+ printf " Pod: %s\n" "$pod_id"
+ printf " URL: %s\n" "$ollama_url"
+ printf " Run: opencode\n"
+ echo ""
+}
+
+main "$@"