#!/usr/bin/env bash
# runpod-session.sh — Manage RunPod Ollama sessions for opencode
#
# Usage:
#   runpod-session.sh [OPTIONS]
#
# Options:
#   --model MODEL        Ollama model tag to warm up (e.g. qwen3-coder:latest)
#                        Defaults to DEFAULT_MODEL in config
#   --all-models         Warm up ALL models listed in WARMUP_MODELS config
#   --gpu-type 'NAME'    Preferred GPU display name (partial match, case-insensitive)
#   --max-price PRICE    Max $/hr ceiling (default: MAX_PRICE_PER_HR in config)
#   --new                Force creation of a new pod (skip restart logic)
#   --stop               Stop the current running pod
#   --status             Show current session state and reachability
#   --help               Show this help
#
# Requires: curl, jq
# Config:   ~/.config/runpod-session/config  (auto-created on first run)

set -euo pipefail

# ─── Paths ────────────────────────────────────────────────────────────────────
OPENCODE_CONFIG="$HOME/.config/opencode/opencode.jsonc"
SESSION_CONFIG_DIR="$HOME/.config/runpod-session"
SESSION_CONFIG="$SESSION_CONFIG_DIR/config"
SESSION_STATE="$SESSION_CONFIG_DIR/state.json"
RUNPOD_API="https://api.runpod.io/graphql"

# ─── Defaults (overridden by config file) ─────────────────────────────────────
OLLAMA_IMAGE="ollama/ollama:latest"
NETWORK_VOLUME_NAME="my-storage"
OPENCODE_PROVIDER="runpod"
DEFAULT_MODEL="qwen3-coder:latest"
WARMUP_MODELS="qwen3-coder:latest translategemma:27b"
MAX_PRICE_PER_HR=2.50
CONTAINER_DISK_GB=15
DEFAULT_GPU_TYPE=""
GPU_COUNT=1
POLL_INTERVAL=5
STARTUP_TIMEOUT=240
WARMUP_NUM_CTX=32768
TRANSART_SCRIPT=""
PUBLISHER_CONFIG=""

# ─── Colors ───────────────────────────────────────────────────────────────────
RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'
CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'

log()  { echo -e "${CYAN}[runpod]${RESET} $*" >&2; }
ok()   { echo -e "${GREEN}[ok]${RESET} $*" >&2; }
warn() { echo -e "${YELLOW}[warn]${RESET} $*" >&2; }
die()  { echo -e "${RED}[error]${RESET} $*" >&2; exit 1; }

# ─── Dependency check ─────────────────────────────────────────────────────────
for cmd in curl jq; do
    command -v "$cmd" &>/dev/null || die "Required command not found: $cmd"
done

# ─── Config bootstrap ─────────────────────────────────────────────────────────
mkdir -p "$SESSION_CONFIG_DIR"

if [[ ! -f "$SESSION_CONFIG" ]]; then
    warn "No config found — creating $SESSION_CONFIG"
    cat > "$SESSION_CONFIG" <<'CONF'
# runpod-session configuration — edit then re-run.

RUNPOD_API_KEY=""

# Network volume name as shown in RunPod dashboard
NETWORK_VOLUME_NAME="my-storage"

# Must match the key in your opencode.json "provider" block
OPENCODE_PROVIDER="runpod"

# Model to activate by default (used when --model is not passed)
DEFAULT_MODEL="qwen3-coder:latest"

# All models that live on this pod — space-separated Ollama tags.
# These get registered in opencode.json and are warmed up with --all-models.
WARMUP_MODELS="qwen3-coder:latest translategemma:27b"

# GPU selection
DEFAULT_GPU_TYPE=""        # e.g. "RTX PRO 6000" — empty = cheapest available
MAX_PRICE_PER_HR=2.50      # hard $/hr ceiling

# Pod configuration
CONTAINER_DISK_GB=15
GPU_COUNT=1
STARTUP_TIMEOUT=240        # seconds before giving up waiting for Ollama
WARMUP_NUM_CTX=32768       # num_ctx used when warming up models into VRAM

# External tool configs to patch with the live pod URL (leave empty to skip)
TRANSART_SCRIPT=""         # e.g. /home/user/bin/transart.py
PUBLISHER_CONFIG=""        # e.g. /home/user/.config/my-publisher/config.toml
CONF
    echo ""
    echo -e "${YELLOW}Edit ${BOLD}$SESSION_CONFIG${RESET}${YELLOW}, set RUNPOD_API_KEY, then re-run.${RESET}"
    exit 0
fi

# shellcheck source=/dev/null
source "$SESSION_CONFIG"
[[ -z "${RUNPOD_API_KEY:-}" ]] && die "RUNPOD_API_KEY not set in $SESSION_CONFIG"

# ─── Argument parsing ─────────────────────────────────────────────────────────
OPT_MODEL="${DEFAULT_MODEL}"
OPT_ALL_MODELS=0
OPT_GPU_TYPE="${DEFAULT_GPU_TYPE:-}"
OPT_MAX_PRICE="${MAX_PRICE_PER_HR}"
OPT_FORCE_NEW=0
OPT_STOP=0
OPT_STATUS=0

while [[ $# -gt 0 ]]; do
    case "$1" in
        --model)       OPT_MODEL="$2";      shift 2 ;;
        --all-models)  OPT_ALL_MODELS=1;    shift   ;;
        --gpu-type)    OPT_GPU_TYPE="$2";   shift 2 ;;
        --max-price)   OPT_MAX_PRICE="$2";  shift 2 ;;
        --new)         OPT_FORCE_NEW=1;     shift   ;;
        --stop)        OPT_STOP=1;          shift   ;;
        --status)      OPT_STATUS=1;        shift   ;;
        --help|-h)     sed -n '2,17p' "$0"; exit 0  ;;
        *) die "Unknown option: $1  (use --help)" ;;
    esac
done

# ─── RunPod GraphQL helper ────────────────────────────────────────────────────
gql() {
    local err
    local response
    response=$(curl --ipv4 -s \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $RUNPOD_API_KEY" \
        -d "$1" \
        "$RUNPOD_API")
    if [[ $? -ne 0 || -z "$response" ]]; then
        die "RunPod API request failed — check API key and connectivity"
    fi
    err=$(echo "$response" | jq -r '.errors[0].message // empty' 2>/dev/null || true)
    [[ -n "$err" ]] && die "RunPod API error: $err"
    echo "$response"
}

# ─── Pod queries ──────────────────────────────────────────────────────────────
get_pods() {
    gql '{"query":"{ myself { pods { id name desiredStatus costPerHr runtime { uptimeInSeconds } machine { gpuDisplayName } } } }"}'
}

find_ollama_pod() {
    # Returns compact JSON of the first pod matching "ollama" in its name, or empty string
    local result
    result=$(echo "$1" | jq -c \
        '.data.myself.pods[] | select(.name | test("ollama"; "i"))' 2>/dev/null | head -1)
    echo "$result"
}

# ─── --status subcommand ──────────────────────────────────────────────────────
cmd_status() {
    echo ""
    echo -e "${BOLD}runpod-session status${RESET}"
    echo ""

    # ── Live pod data from API ────────────────────────────────────────────────
    local pods_json pod_json
    pods_json=$(get_pods)
    pod_json=$(find_ollama_pod "$pods_json")

    if [[ -n "$pod_json" ]]; then
        local pod_id status gpu cost_hr uptime_sec cost_so_far
        pod_id=$(echo "$pod_json"     | jq -r '.id')
        status=$(echo "$pod_json"     | jq -r '.desiredStatus')
        gpu=$(echo "$pod_json"        | jq -r '.machine.gpuDisplayName // "?"')
        cost_hr=$(echo "$pod_json"    | jq -r '.costPerHr // 0')
        uptime_sec=$(echo "$pod_json" | jq -r '.runtime.uptimeInSeconds // 0')

        # Calculate cost: (uptime_seconds / 3600) * cost_per_hr
        cost_so_far=$(echo "$uptime_sec $cost_hr" | awk '{printf "%.4f", ($1/3600)*$2}')
        uptime_human=$(echo "$uptime_sec" | awk '{
            h=int($1/3600); m=int(($1%3600)/60); s=$1%60
            if (h>0) printf "%dh %dm %ds", h, m, s
            else if (m>0) printf "%dm %ds", m, s
            else printf "%ds", s
        }')

        echo -e "  Pod:      ${BOLD}${pod_id}${RESET}"
        echo -e "  GPU:      ${gpu}"
        echo -e "  Status:   ${status}"
        echo -e "  Rate:     \$${cost_hr}/hr"
        if [[ "$status" == "RUNNING" ]]; then
            echo -e "  Uptime:   ${uptime_human}"
            echo -e "  Cost:     \$${cost_so_far} this session"
        else
            echo -e "  Uptime:   —"
            echo -e "  Cost:     —"
        fi
        echo ""

        # ── Ollama reachability ───────────────────────────────────────────────
        local base="https://${pod_id}-11434.proxy.runpod.net"
        local tags_response
        tags_response=$(curl -s --ipv4 --max-time 5 "${base}/api/tags" 2>/dev/null || true)
        if echo "$tags_response" | jq -e '.models' > /dev/null 2>&1; then
            echo -e "  Ollama:   ${GREEN}reachable${RESET}"
            local models
            models=$(echo "$tags_response" | jq -r '.models[].name' 2>/dev/null || true)
            if [[ -n "$models" ]]; then
                echo -e "  In VRAM:  $(echo "$models" | tr '\n' ' ')"
            else
                echo -e "  In VRAM:  none (model will load on first request)"
            fi
        else
            echo -e "  Ollama:   ${YELLOW}not reachable${RESET}"
        fi
    else
        echo -e "  ${YELLOW}No Ollama pod found in your account.${RESET}"
    fi

    # ── Saved session state ───────────────────────────────────────────────────
    if [[ -f "$SESSION_STATE" ]]; then
        echo ""
        echo -e "  ${BOLD}Last session record:${RESET}"
        jq -r '"  Started:  \(.started_at)\n  Model:    \(.model)\n  URL:      \(.ollama_url)"' \
            "$SESSION_STATE"
    fi

    echo ""
}

# ─── Pod lifecycle ────────────────────────────────────────────────────────────
restart_pod() {
    log "Restarting pod $1 ..."
    gql "{\"query\":\"mutation { podResume(input: { podId: \\\"$1\\\", gpuCount: $GPU_COUNT }) { id desiredStatus } }\"}" > /dev/null
}

stop_pod() {
    log "Stopping pod $1 ..."
    gql "{\"query\":\"mutation { podStop(input: { podId: \\\"$1\\\" }) { id desiredStatus } }\"}" > /dev/null
    ok "Pod stopped. Storage costs continue while stopped (~\$0.002/hr)."
}

terminate_pod() {
    log "Terminating pod $1 ..."
    gql "{\"query\":\"mutation { podTerminate(input: { podId: \\\"$1\\\" }) }\"}" > /dev/null
    ok "Pod terminated."
}

# ─── --stop subcommand ────────────────────────────────────────────────────────
cmd_stop() {
    local pod_id=""
    if [[ -f "$SESSION_STATE" ]]; then
        pod_id=$(jq -r '.pod_id' "$SESSION_STATE")
    else
        local pods_json pod_json
        pods_json=$(get_pods)
        pod_json=$(find_ollama_pod "$pods_json")
        [[ -z "$pod_json" ]] && die "No Ollama pod found in your account."
        pod_id=$(echo "$pod_json" | jq -r '.id')
    fi
    echo -n "  Stop pod $pod_id? [y/N] "
    read -r confirm
    [[ "${confirm,,}" != "y" ]] && { log "Aborted."; exit 0; }
    stop_pod "$pod_id"
    rm -f "$SESSION_STATE"
}

# ─── GPU selection ────────────────────────────────────────────────────────────
# Returns a JSON array of candidates sorted by price (preferred first if set).
# Caller iterates and tries each until one succeeds.
get_gpu_candidates() {
    local preferred="$1" max_price="$2"
    log "Querying available GPUs (secure cloud, max \$$max_price/hr) ..." >&2

    local result candidates
    result=$(curl -s --ipv4 \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $RUNPOD_API_KEY" \
        -d '{"query":"{ gpuTypes { id displayName memoryInGb secureCloud lowestPrice(input: { gpuCount: 1 }) { uninterruptablePrice } } }"}' \
        "$RUNPOD_API")

    candidates=$(echo "$result" | jq -c \
        --arg max "$max_price" \
        '[.data.gpuTypes[]
          | select(.secureCloud == true)
          | select((.lowestPrice.uninterruptablePrice // 0) > 0)
          | select(.lowestPrice.uninterruptablePrice <= ($max | tonumber))
          | { id: .id, name: .displayName, vram: .memoryInGb,
              price: .lowestPrice.uninterruptablePrice }
         ] | sort_by(.price)')

    [[ -z "$candidates" || "$candidates" == "[]" ]] && \
        die "No GPUs on secure cloud within \$$max_price/hr. Try --max-price."

    # Bubble preferred GPU to front so it's tried first
    if [[ -n "$preferred" ]]; then
        local reordered
        reordered=$(echo "$candidates" | jq -c \
            --arg p "$preferred" \
            '( [.[] | select(.name | test($p; "i"))] ) +
             ( [.[] | select(.name | test($p; "i") | not)] )')
        local pcount
        pcount=$(echo "$reordered" | jq 'map(select(.name | test($p; "i"))) | length' --arg p "$preferred" 2>/dev/null || echo 0)
        if [[ "$pcount" -eq 0 ]]; then
            warn "Preferred GPU '$preferred' not in catalog within price limit. Will try all." >&2
        fi
        candidates="$reordered"
    fi

    echo "$candidates"
}

# ─── Network volume ───────────────────────────────────────────────────────────
get_network_volume_id() {
    local result vol_id
    result=$(curl -s --ipv4 \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $RUNPOD_API_KEY" \
        -d '{"query":"{ myself { networkVolumes { id name } } }"}' \
        "$RUNPOD_API")
    vol_id=$(echo "$result" | jq -r \
        --arg n "$NETWORK_VOLUME_NAME" \
        '.data.myself.networkVolumes[] | select(.name == $n) | .id')
    [[ -z "$vol_id" ]] && die "Network volume '$NETWORK_VOLUME_NAME' not found."
    echo "$vol_id"
}

# ─── Create pod ───────────────────────────────────────────────────────────────
create_pod() {
    local gpu_id="$1" vol_id="$2"
    log "Creating pod ..."

    # Build env array — omit OLLAMA_MODELS entry if path is unset
    local env_json
    env_json='[{"key":"OLLAMA_HOST","value":"0.0.0.0"},{"key":"OLLAMA_LOAD_TIMEOUT","value":"10m"},{"key":"OLLAMA_KEEP_ALIVE","value":"1h"}]'
    if [[ -n "${OLLAMA_MODELS_PATH:-}" ]]; then
        env_json=$(echo "$env_json" | jq \
            --arg v "$OLLAMA_MODELS_PATH" \
            '. + [{"key":"OLLAMA_MODELS","value":$v}]')
    fi

    local payload
    payload=$(jq -n \
        --arg gpu_id    "$gpu_id" \
        --arg vol_id    "$vol_id" \
        --arg image     "$OLLAMA_IMAGE" \
        --argjson gpu_count   "$GPU_COUNT" \
        --argjson disk        "$CONTAINER_DISK_GB" \
        --argjson env         "$env_json" \
        '{query: "mutation($input: PodFindAndDeployOnDemandInput!) { podFindAndDeployOnDemand(input: $input) { id } }",
          variables: { input: {
            cloudType: "SECURE",
            gpuCount: $gpu_count,
            volumeInGb: 0,
            containerDiskInGb: $disk,
            minVcpuCount: 4,
            minMemoryInGb: 15,
            gpuTypeId: $gpu_id,
            name: "ollama-session",
            imageName: $image,
            ports: "11434/http",
            volumeMountPath: "/workspace",
            networkVolumeId: $vol_id,
            env: $env
          }}}')

    local result pod_id err_code
    result=$(curl -s --ipv4 \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $RUNPOD_API_KEY" \
        -d "$payload" \
        "$RUNPOD_API")

    pod_id=$(echo "$result" | jq -r '.data.podFindAndDeployOnDemand.id // empty')
    if [[ -n "$pod_id" ]]; then
        echo "$pod_id"
        return 0
    fi

    err_code=$(echo "$result" | jq -r '.errors[0].extensions.code // empty')
    if [[ "$err_code" == "SUPPLY_CONSTRAINT" ]]; then
        warn "No supply for GPU $gpu_id — trying next candidate ..." >&2
        return 1
    fi
    die "Pod creation failed. Response: $result"
}

# ─── Wait for Ollama ──────────────────────────────────────────────────────────
wait_for_pod() {
    local pod_id="$1"
    local url="https://${pod_id}-11434.proxy.runpod.net"
    local elapsed=0
    log "Polling $url/api/tags (timeout: ${STARTUP_TIMEOUT}s) ..."
    while (( elapsed < STARTUP_TIMEOUT )); do
        local response
        response=$(curl -s --ipv4 --max-time 5 "${url}/api/tags" 2>/dev/null || true)
        if echo "$response" | jq -e '.models' > /dev/null 2>&1; then
            echo ""; ok "Ollama is up."; return 0
        fi
        printf "  [%3ds] waiting...\r" "$elapsed"
        sleep "$POLL_INTERVAL"
        (( elapsed += POLL_INTERVAL ))
    done
    echo ""
    die "Timed out after ${STARTUP_TIMEOUT}s. Check the RunPod dashboard."
}

# ─── Patch opencode.json ──────────────────────────────────────────────────────
#
# What this changes in your opencode.json:
#   .provider.runpod.options.baseURL     → https://<pod-id>-11434.proxy.runpod.net/v1
#   .model                               → runpod/qwen3-coder:latest  (DEFAULT_MODEL)
#   .provider.runpod.models              → merges all WARMUP_MODELS in (preserving
#                                          any existing per-model config you have)
#
# Uses jq's `*` (recursive merge) so your existing model overrides are never clobbered.
# A .bak backup is written before any changes.
#
patch_opencode_config() {
    local new_url="$1"   # full URL including /v1

    [[ ! -f "$OPENCODE_CONFIG" ]] && die "opencode config not found at $OPENCODE_CONFIG"
    cp "$OPENCODE_CONFIG" "${OPENCODE_CONFIG}.bak"

    # Build a jq object for all warmup models: { "model:tag": {"tools":true}, ... }
    # tools:true is a safe default — it won't override existing per-model settings
    # because we merge with * where existing config wins on conflicts.
    local models_patch="{}"
    for m in $WARMUP_MODELS; do
        models_patch=$(printf '%s' "$models_patch" \
            | jq --arg m "$m" '. + {($m): {"tools": true}}')
    done

    local tmp
    tmp=$(mktemp)
    jq \
        --arg provider  "$OPENCODE_PROVIDER" \
        --arg url       "$new_url" \
        --arg model     "${OPENCODE_PROVIDER}/${DEFAULT_MODEL}" \
        --argjson patch "$models_patch" \
        '
        .provider[$provider].options.baseURL = $url
        | .model = $model
        | .provider[$provider].models = (
            $patch * (.provider[$provider].models // {})
          )
        ' "$OPENCODE_CONFIG" > "$tmp" && mv "$tmp" "$OPENCODE_CONFIG"

    ok "opencode.json patched:"
    log "  provider.${OPENCODE_PROVIDER}.options.baseURL = $new_url"
    log "  model = ${OPENCODE_PROVIDER}/${DEFAULT_MODEL}"
}

# ─── Patch external tool configs ─────────────────────────────────────────────
#
# Updates the Ollama host URL in:
#   transart.py        — replaces OLLAMA_HOST = "..." (bare URL, no /v1)
#   config.toml        — replaces ollama_host = "..." (bare URL, no /v1)
#
patch_external_configs() {
    local bare_url="$1"   # https://<pod-id>-11434.proxy.runpod.net  (no /v1)

    if [[ -n "${TRANSART_SCRIPT:-}" ]]; then
        if [[ ! -f "$TRANSART_SCRIPT" ]]; then
            warn "TRANSART_SCRIPT not found: $TRANSART_SCRIPT — skipping"
        else
            cp "$TRANSART_SCRIPT" "${TRANSART_SCRIPT}.bak"
            sed -i "s|^OLLAMA_HOST = \".*\"|OLLAMA_HOST = \"${bare_url}\"|" "$TRANSART_SCRIPT"
            ok "transart.py patched: OLLAMA_HOST = $bare_url"
        fi
    fi

    if [[ -n "${PUBLISHER_CONFIG:-}" ]]; then
        if [[ ! -f "$PUBLISHER_CONFIG" ]]; then
            warn "PUBLISHER_CONFIG not found: $PUBLISHER_CONFIG — skipping"
        else
            cp "$PUBLISHER_CONFIG" "${PUBLISHER_CONFIG}.bak"
            sed -i "s|^ollama_host = \".*\"|ollama_host = \"${bare_url}\"|" "$PUBLISHER_CONFIG"
            ok "my-publisher config patched: ollama_host = $bare_url"
        fi
    fi
}

# ─── Warm up one model ────────────────────────────────────────────────────────
warmup_model() {
    local pod_id="$1" model="$2"
    local base="https://${pod_id}-11434.proxy.runpod.net"

    log "Warming up '$model' into VRAM ..."

    if curl -s --ipv4 --max-time 300 -X POST "${base}/api/generate" \
        -H "Content-Type: application/json" \
        -d "{\"model\": \"$model\", \"prompt\": \"hi\", \"stream\": false, \"options\": {\"num_ctx\": ${WARMUP_NUM_CTX:-32768}}}" \
        > /dev/null 2>&1; then
        ok "  '$model' is loaded."
    else
        warn "  Warmup for '$model' failed — model will load on first use."
    fi
}

# ─── State persistence ────────────────────────────────────────────────────────
save_state() {
    jq -n \
        --arg pod_id "$1" \
        --arg url    "$2" \
        --arg model  "$3" \
        --arg ts     "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
        '{ pod_id: $pod_id, ollama_url: $url, model: $model, started_at: $ts }' \
        > "$SESSION_STATE"
}

# ─── Main ─────────────────────────────────────────────────────────────────────
main() {
    [[ $OPT_STATUS -eq 1 ]] && { cmd_status; exit 0; }
    [[ $OPT_STOP   -eq 1 ]] && { cmd_stop;   exit 0; }

    echo -e "${BOLD}runpod-session${RESET} — Ollama on RunPod → opencode"
    echo ""

    local pod_id=""
    local _skip_wait=0

    # ── 1. Existing pod check ─────────────────────────────────────────────────
    if [[ $OPT_FORCE_NEW -eq 0 ]]; then
        log "Checking for existing Ollama pods ..."
        local pods_json pod_json
        pods_json=$(get_pods)
        pod_json=$(find_ollama_pod "$pods_json")

        if [[ -n "$pod_json" ]]; then
            pod_id=$(echo "$pod_json" | jq -r '.id')
            local status gpu cost
            status=$(echo "$pod_json" | jq -r '.desiredStatus')
            gpu=$(echo "$pod_json"    | jq -r '.machine.gpuDisplayName // "?"')
            cost=$(echo "$pod_json"   | jq -r '.costPerHr // "?"')

            echo -e "  Found: ${BOLD}${pod_id}${RESET}  GPU: ${gpu}  \$${cost}/hr  Status: ${status}"
            echo ""

            case "$status" in
                RUNNING)
                    local _check
                    _check=$(curl -s --ipv4 --max-time 5                         "https://${pod_id}-11434.proxy.runpod.net/api/tags" 2>/dev/null || true)
                    if echo "$_check" | jq -e '.models' > /dev/null 2>&1; then
                        ok "Already running and reachable — skipping startup sequence."
                        _skip_wait=1
                    else
                        log "Pod is running but Ollama not yet reachable — waiting ..."
                    fi
                    ;;
                EXITED|STOPPED)
                    echo -n "  [R]estart  [D]elete and create new  [A]bort  [R/d/a]: "
                    read -r choice
                    case "${choice,,}" in
                        d) terminate_pod "$pod_id"; pod_id="" ;;
                        a) log "Aborted."; exit 0 ;;
                        *) restart_pod "$pod_id" ;;
                    esac
                    ;;
                *)
                    warn "Unexpected pod state '$status' — ignoring this pod."
                    pod_id=""
                    ;;
            esac
        else
            log "No existing Ollama pod found."
        fi
    fi

    # ── 2. Create new pod if needed ───────────────────────────────────────────
    if [[ -z "$pod_id" ]]; then
        local vol_id candidates gpu_json gpu_id gpu_name gpu_vram gpu_price
        vol_id=$(get_network_volume_id)
        ok "Volume: ${NETWORK_VOLUME_NAME} ($vol_id)"

        candidates=$(get_gpu_candidates "$OPT_GPU_TYPE" "$OPT_MAX_PRICE")
        local count
        count=$(echo "$candidates" | jq 'length')
        [[ "$count" -eq 0 ]] && die "No GPU candidates found."

        log "${count} GPU options within budget. Will prompt for each."

        local i=0
        while [[ $i -lt $count ]]; do
            gpu_json=$(echo "$candidates" | jq -c --argjson i "$i" '.[$i]')
            gpu_id=$(echo "$gpu_json"    | jq -r '.id')
            gpu_name=$(echo "$gpu_json"  | jq -r '.name')
            gpu_vram=$(echo "$gpu_json"  | jq -r '.vram')
            gpu_price=$(echo "$gpu_json" | jq -r '.price')

            echo ""
            echo -e "  ${BOLD}${gpu_name}${RESET}  ${gpu_vram}GB VRAM  \$${gpu_price}/hr"
            echo -n "  Create pod with this GPU? [Y/n/a(bort)] "
            read -r choice
            case "${choice,,}" in
                a) log "Aborted."; exit 0 ;;
                n) (( i++ )) || true; continue ;;
            esac

            if pod_id=$(create_pod "$gpu_id" "$vol_id"); then
                ok "Pod created: $pod_id  GPU: ${gpu_name}"
                break
            fi
            (( i++ )) || true
        done

        [[ -z "$pod_id" ]] && die "All ${count} GPU candidates exhausted. Try --max-price or later."
    fi

    # ── 3. Wait for Ollama if needed ─────────────────────────────────────────
    if [[ "${_skip_wait:-0}" != "1" ]]; then
        wait_for_pod "$pod_id"
    fi

    # ── 4. Final URL ──────────────────────────────────────────────────────────
    local ollama_url="https://${pod_id}-11434.proxy.runpod.net/v1"

    # ── 5. Patch opencode.json ────────────────────────────────────────────────
    patch_opencode_config "$ollama_url"

    # ── 5b. Patch external tool configs (transart, my-publisher) ─────────────
    local bare_url="https://${pod_id}-11434.proxy.runpod.net"
    patch_external_configs "$bare_url"

    # ── 6. Warmup ─────────────────────────────────────────────────────────────
    if [[ $OPT_ALL_MODELS -eq 1 ]]; then
        for m in $WARMUP_MODELS; do
            warmup_model "$pod_id" "$m"
        done
    elif [[ -n "$OPT_MODEL" ]]; then
        warmup_model "$pod_id" "$OPT_MODEL"
    else
        warn "No warmup requested. Use --model MODEL or --all-models."
    fi

    # ── 7. Save state ─────────────────────────────────────────────────────────
    save_state "$pod_id" "$ollama_url" "$OPT_MODEL"

    # ── 8. Done ───────────────────────────────────────────────────────────────
    echo ""
    echo -e "${BOLD}${GREEN}Ready.${RESET}"
    printf "  Pod:   %s\n" "$pod_id"
    printf "  URL:   %s\n" "$ollama_url"
    printf "  Run:   opencode\n"
    echo ""
}

main "$@"