#!/bin/bash
# mkwheels — build a reproducible, pinned Python wheels tarball for a package.
#
# Copyright (C) 2026 Danilo M. <danix@danix.xyz>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <https://www.gnu.org/licenses/>.
set -eu
set -o pipefail   # so a tar/find failure can't be masked by gzip succeeding

usage() {
    cat <<EOF
usage: ${0##*/} pypi --name PKG --ver VER [--epoch N]
       ${0##*/} gh   --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N]

Build a reproducible pinned Python wheels tarball <name>-wheels-<ver>.tar.gz
plus a hashed requirements.txt, for vendoring into a SlackBuild.

Modes:
  pypi   Vendor a PyPI package. Epoch auto-derived from the PyPI upload time.
  gh     Vendor a GitHub source release (resolves PyPI + git deps via pip).
         Epoch auto-derived from the GitHub release published_at.

Flags:
  --name PKG        Package name for the output filename.
                    gh: defaults to the repo basename, lowercased.
  --ver VER         Version for the output filename. A leading 'v' is stripped.
  --repo OWNER/REPO GitHub repository (gh mode only).
  --tag TAG         Git tag to fetch (gh mode). Leading 'v' stripped for naming;
                    the real ref is resolved by trying <tag> then v<tag>.
                    Defaults to the normalized --ver.
  --epoch N         SOURCE_DATE_EPOCH for the tarball mtime. Overrides the
                    auto-derived value.

  OUTPUT            env var: output directory (default: current dir).

Requires: python3+pip, jq, curl, tar, gzip, md5sum.
EOF
}

die() { echo "error: $*" >&2; exit 1; }

# ---- parse args -----------------------------------------------------------

mode=${1:-}
case "$mode" in
    pypi|gh) shift ;;
    -h|--help) usage; exit 0 ;;
    *) usage >&2; exit 2 ;;
esac

name=""; ver=""; repo=""; tag=""; epoch=""
while [ $# -gt 0 ]; do
    case "$1" in
        --name)  name=${2:-}; shift 2 ;;
        --ver)   ver=${2:-}; shift 2 ;;
        --repo)  repo=${2:-}; shift 2 ;;
        --tag)   tag=${2:-}; shift 2 ;;
        --epoch) epoch=${2:-}; shift 2 ;;
        -h|--help) usage; exit 0 ;;
        *) usage >&2; exit 2 ;;
    esac
done

# Strip a single leading 'v' from version-like strings.
strip_v() { printf '%s' "${1#v}"; }

[ -n "$ver" ] || die "--ver is required"
ver=$(strip_v "$ver")

OUTPUT=${OUTPUT:-$PWD}

# Check required tools up front.
for tool in python3 jq curl tar gzip md5sum; do
    command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool"
done
python3 -m pip --version >/dev/null 2>&1 || die "python3 pip module not available"

# ---- mode-specific resolution ---------------------------------------------
# Each mode sets: name (final), epoch (if not given), and pip_spec — the
# argument handed to `pip download` to populate the wheels dir.

work=$(mktemp -d)
trap 'rm -rf "$work"' EXIT

if [ "$mode" = pypi ]; then
    [ -n "$name" ] || die "pypi mode: --name is required"
    # Pre-built wheels are downloaded as-is (deterministic).
    pip_action=download
    pip_spec="$name==$ver"

    if [ -z "$epoch" ]; then
        meta=$(curl -fsSL "https://pypi.org/pypi/$name/$ver/json") \
            || die "cannot fetch PyPI metadata for $name $ver"
        iso=$(printf '%s' "$meta" \
            | jq -r '[.urls[].upload_time_iso_8601] | sort | .[0] // empty')
        [ -n "$iso" ] || die "no upload time found for $name $ver on PyPI"
        epoch=$(date -u -d "$iso" +%s)
        echo "warning: epoch not given; using PyPI upload time $iso (epoch $epoch)" >&2
    fi
else
    # gh mode
    [ -n "$repo" ] || die "gh mode: --repo OWNER/REPO is required"
    [ -n "$name" ] || name=$(printf '%s' "${repo##*/}" | tr '[:upper:]' '[:lower:]')
    [ -n "$tag" ] || tag="$ver"
    tag=$(strip_v "$tag")

    # Resolve the real ref: try the bare tag, then v<tag>.
    ref=""; rel=""
    for cand in "$tag" "v$tag"; do
        if rel=$(curl -fsSL \
            "https://api.github.com/repos/$repo/releases/tags/$cand" 2>/dev/null); then
            ref=$cand
            break
        fi
    done
    [ -n "$ref" ] || die "no GitHub release found for $repo tag $tag (or v$tag)"

    if [ -z "$epoch" ]; then
        iso=$(printf '%s' "$rel" | jq -r '.published_at // empty')
        [ -n "$iso" ] || die "no published_at for $repo release $ref"
        epoch=$(date -u -d "$iso" +%s)
        echo "warning: epoch not given; using GitHub published_at $iso (epoch $epoch)" >&2
    fi

    # Download + unpack the tagged source.
    src_tgz="$work/src.tar.gz"
    curl -fsSL "https://github.com/$repo/archive/refs/tags/$ref.tar.gz" \
        -o "$src_tgz" || die "cannot fetch source tarball for $repo $ref"
    mkdir -p "$work/src"
    tar -xzf "$src_tgz" -C "$work/src"
    # GitHub archives unpack to a single top-level dir.
    src_dir=$(find "$work/src" -mindepth 1 -maxdepth 1 -type d | head -1)
    [ -n "$src_dir" ] || die "unexpected source archive layout"
    # A local source dir must be *built* to a wheel; `pip download <dir>` only
    # resolves metadata and leaves the project itself unmaterialized. `pip
    # wheel` builds the project and all its deps into the wheels dir.
    pip_action=wheel
    pip_spec="$src_dir"
fi

export SOURCE_DATE_EPOCH="$epoch"
echo "mkwheels: $name $ver -> $OUTPUT/$name-wheels-$ver.tar.gz"

# ---- shared: resolve tree, emit requirements, pack tarball ----------------

wheels="$work/wheels"
mkdir -p "$wheels"

# Isolated build env so host pip config / installed pkgs don't leak in.
python3 -m venv "$work/venv"
"$work/venv/bin/pip" install --quiet --upgrade pip wheel >/dev/null

# Resolve the full tree into $wheels. pypi: download pre-built wheels (sdists
# are built to wheels). gh: build the local source + deps to wheels.
if [ "$pip_action" = download ]; then
    "$work/venv/bin/pip" download "$pip_spec" --dest "$wheels"
else
    "$work/venv/bin/pip" wheel "$pip_spec" --wheel-dir "$wheels"
fi

# Emit a pinned, hashed requirements.txt from the downloaded files. Each
# distribution is pinned to its version with a sha256 hash per file.
req="$work/requirements.txt"
: > "$req"
for f in "$wheels"/*; do
    base=$(basename "$f")
    # name-version from the wheel/sdist filename: split on first two '-' fields
    # wheels: name-version-...; sdists: name-version.tar.gz
    fname=${base%%-*}
    rest=${base#*-}
    fver=${rest%%-*}
    fver=${fver%.tar.gz}
    hash=$(python3 -c "import hashlib,sys;print(hashlib.sha256(open(sys.argv[1],'rb').read()).hexdigest())" "$f")
    printf '%s==%s --hash=sha256:%s\n' "$fname" "$fver" "$hash" >> "$req"
done
sort -o "$req" "$req"

mkdir -p "$OUTPUT"
tarball="$OUTPUT/$name-wheels-$ver.tar.gz"

# Reproducible archive: sorted entries, normalized ownership/mtime, gzip -n.
# Run from $work so the archive holds a top-level 'wheels/' dir.
( cd "$work" \
  && find wheels -print0 | LC_ALL=C sort -z \
     | tar --no-recursion --null --files-from=- \
           --mtime="@$SOURCE_DATE_EPOCH" \
           --owner=0 --group=0 --numeric-owner \
           -cf - \
     | gzip -n > "$tarball" )

cp "$work/requirements.txt" "$OUTPUT/requirements.txt"

md5=$(md5sum "$tarball" | cut -d' ' -f1)
echo "wheels tarball: $tarball"
echo "requirements:   $OUTPUT/requirements.txt"
echo "epoch:          $SOURCE_DATE_EPOCH"
echo "md5sum:         $md5"
