From d71a10b8a10e04d9a1cd5683034f3f94d4a81a3a Mon Sep 17 00:00:00 2001 From: "Danilo M." Date: Fri, 26 Jun 2026 13:33:50 +0200 Subject: mkwheels: add gh source mode (pypi/gh subcommands) Vendor GitHub source releases that are not on PyPI (e.g. NetExec, which also pulls git deps). New flag-based CLI with pypi/gh mode selectors: mkwheels pypi --name PKG --ver VER [--epoch N] mkwheels gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] gh mode downloads the tagged source and uses `pip wheel` to build the project plus its whole dependency tree (PyPI + git deps) into wheels; `pip download ` is wrong for a local source since it only resolves metadata. Epoch auto-derives from the release published_at. selftest now covers both modes. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 54 ++++--- README.md | 55 +++++-- .../2026-06-26-mkwheels-gh-source-mode-design.md | 18 +-- mkwheels | 171 +++++++++++++++------ selftest | 43 ++++-- 5 files changed, 237 insertions(+), 104 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c93c237..a20247a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ pattern, applied to Python). ``` mkwheels # the whole CLI (single-file bash) -selftest # reproducibility check (builds six twice, asserts md5 match) +selftest # reproducibility check (both modes, asserts md5 match) LICENSE # GPLv2 full text README.md # user-facing usage + rationale docs/superpowers/ # design spec + implementation plan @@ -21,35 +21,46 @@ outgrows one file. ## Invocation +Two subcommands; all options are explicit flags, no positionals. + ``` -mkwheels [epoch] +mkwheels pypi --name PKG --ver VER [--epoch N] +mkwheels gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] ``` -- ` ` — PyPI package and exact version. -- `[epoch]` — optional `SOURCE_DATE_EPOCH`. Omitted → auto-derived from the - PyPI release upload time (earliest file's `upload_time_iso_8601`), with a - warning. Pass it explicitly to override. +- `--ver` / `--tag` strip a single leading `v`; the output version is always + without `v`. Output: `-wheels-.tar.gz` + `requirements.txt`. +- `--epoch` optional in both modes; omitted → auto-derived (with a warning): + - `pypi`: earliest file's `upload_time_iso_8601` from the PyPI JSON. + - `gh`: the GitHub release `published_at` for the tag. +- `gh` defaults: `--name` = repo basename lowercased; `--tag` = normalized + `--ver`; the real ref is resolved by trying `` then `v`. - `OUTPUT` env var — output dir (default: `$PWD`). -Outputs `-wheels-.tar.gz` + `requirements.txt`, prints md5 + epoch. - ## How it works -1. Arg parse + required-tool check (`python3`+pip, `jq`, `curl`, `tar`, `gzip`, - `md5sum`). -2. Resolve `SOURCE_DATE_EPOCH` (explicit arg, else PyPI JSON via `jq`). -3. Throwaway venv + `pip download ==` into `wheels/`. -4. Emit pinned + hashed `requirements.txt` (audit record only, not the install +1. Arg parse (mode selector + flags) + required-tool check (`python3`+pip, + `jq`, `curl`, `tar`, `gzip`, `md5sum`). +2. Mode resolution sets name, epoch, and how `wheels/` is populated: + - `pypi`: epoch from PyPI JSON; `pip download ==` (pre-built + wheels, deterministic). + - `gh`: resolve release ref + `published_at`; download+unpack the tagged + source; `pip wheel ` builds the project **and all deps** (PyPI + + `git+` deps) to wheels. `pip download ` is wrong here — it only + resolves metadata and leaves the local project unmaterialized. +3. Emit pinned + hashed `requirements.txt` (audit record only, not the install input). -5. Pack a byte-reproducible `.tar.gz`: sorted entries, `--mtime=@epoch`, +4. Pack a byte-reproducible `.tar.gz`: sorted entries, `--mtime=@epoch`, `--owner=0 --group=0 --numeric-owner`, `gzip -n`. ## Reproducibility -This is the whole point. The same ` [epoch]` MUST yield a -byte-identical tarball. The tar normalization (step 5) plus `set -o pipefail` -(so a `tar` failure can't be masked by `gzip` exiting 0) are what guarantees -it. +This is the whole point. The same inputs + epoch MUST yield a byte-identical +tarball. The tar normalization (step 4) plus `set -o pipefail` (so a `tar` +failure can't be masked by `gzip` exiting 0) are what guarantees it. In `gh` +mode the project is built from source, so reproducibility holds per-machine +(build once on the target platform, upload, pin md5); wheels with compiled +extensions may differ across toolchains. **Git-sourced deps** (packages whose upstream pins a git URL, e.g. NetExec's impacket) are frozen at download time: `pip download` resolves whatever is @@ -66,9 +77,10 @@ current, and the tarball, once built, is the source of truth. The ## Testing -`./selftest` — builds `six` twice with a fixed epoch and asserts the two -tarballs are byte-identical. Run it after any change to the tar/packing logic. -Needs network (pypi.org). No test framework. +`./selftest` — builds twice with a fixed epoch in both modes (`pypi` six, +`gh` pyparsing) and asserts each pair of tarballs is byte-identical. Run it +after any change to the tar/packing or mode-resolution logic. Needs network +(pypi.org, github.com). No test framework. ## Maintainer diff --git a/README.md b/README.md index 8823a3b..530a85c 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,42 @@ # mkwheels Build a reproducible, pinned Python wheels tarball for vendoring into a -SlackBuild (or any offline `pip install`). Generic over package + version. +SlackBuild (or any offline `pip install`). Generic over package + version, with +two source modes: PyPI packages and GitHub source releases. ## Usage ``` -mkwheels [epoch] +mkwheels pypi --name PKG --ver VER [--epoch N] +mkwheels gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] ``` -- ` ` — the PyPI package and exact version to vendor. -- `[epoch]` — optional `SOURCE_DATE_EPOCH`. Omitted → auto-derived from the - PyPI release upload time (a warning is printed). Pass it to override. +Common flags: + +- `--ver VER` — version for the output filename. A leading `v` is stripped. +- `--epoch N` — optional `SOURCE_DATE_EPOCH`. Omitted → auto-derived (see each + mode). Pass it to override. - `OUTPUT` env var overrides the output directory (default: current dir). -Outputs `-wheels-.tar.gz` and `requirements.txt` (pinned + hashed). +### pypi mode + +- `--name PKG` — the PyPI package, downloaded at exactly `--ver`. +- Epoch auto-derived from the PyPI release upload time. + +### gh mode + +For packages not on PyPI (GitHub source, possibly with git dependencies). The +tagged source is downloaded and pip builds the project plus its whole +dependency tree (PyPI deps and any `git+` deps) into wheels. + +- `--repo OWNER/REPO` — the GitHub repository. +- `--name PKG` — output name; defaults to the repo basename, lowercased. +- `--tag TAG` — git tag to fetch; a leading `v` is stripped for naming, and the + real ref is resolved by trying `` then `v`. Defaults to `--ver`. +- Epoch auto-derived from the GitHub release `published_at` (the repo must + publish a GitHub Release for the tag). + +Outputs `-wheels-.tar.gz` and `requirements.txt` (pinned + hashed). Prints the md5sum and the resolved epoch. The `requirements.txt` is an audit record of the resolved versions, not the install input: the SlackBuild installs straight from the wheel files (`--find-links`), it does not re-resolve the @@ -30,21 +52,28 @@ PyPI releases are immutable, so the wheel set for a fixed version is deterministic. The tarball normalizes tar metadata (sorted entries, fixed mtime/owner, `gzip -n`) so it is byte-identical for the same inputs + epoch. -Git-sourced dependencies (packages whose upstream pins a git URL) are frozen -at download time: `pip download` resolves whatever is current, and the emitted -`requirements.txt` records the exact resolved versions. Once built, the -tarball is the source of truth. +In `gh` mode the project (and any source-only deps) are built from source. +With a fixed epoch this is byte-identical on the same machine, which is what +vendoring needs: build once, upload, pin the md5. Wheels containing compiled +extensions may differ across machines/toolchains, so build the vendored tarball +on the target platform. + +Git-sourced dependencies (packages whose upstream pins a git URL) are frozen at +build time: pip resolves whatever is current, and the emitted `requirements.txt` +records the exact resolved versions. Once built, the tarball is the source of +truth. ## SBo integration -Run `mkwheels `, upload the tarball to your package host, and set +Run `mkwheels`, upload the tarball to your package host, and set `DOWNLOAD_x86_64` / `MD5SUM_x86_64` in the SlackBuild `.info` to point at it. The SlackBuild then `pip install --no-index --find-links=` into a venv. ## Test -`./selftest` builds `six` twice with a fixed epoch and asserts the two wheels -tarballs are byte-identical. Run it after changing the tar/packing logic. +`./selftest` builds twice with a fixed epoch in both modes (`pypi` six, +`gh` pyparsing) and asserts each pair of wheels tarballs is byte-identical. Run +it after changing the tar/packing or mode-resolution logic. ## License diff --git a/docs/superpowers/specs/2026-06-26-mkwheels-gh-source-mode-design.md b/docs/superpowers/specs/2026-06-26-mkwheels-gh-source-mode-design.md index f2b6a7a..ecf3ae6 100644 --- a/docs/superpowers/specs/2026-06-26-mkwheels-gh-source-mode-design.md +++ b/docs/superpowers/specs/2026-06-26-mkwheels-gh-source-mode-design.md @@ -22,8 +22,8 @@ breaking change to the current positional interface; acceptable because the only consumer (the netexec SlackBuild) is not yet written. ``` -mkwheels --pypi --name PKG --ver VER [--epoch N] -mkwheels --gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] +mkwheels pypi --name PKG --ver VER [--epoch N] +mkwheels gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] ``` ### Normalization @@ -32,13 +32,13 @@ mkwheels --gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] - The normalized version is what appears in the output filename, always without a leading `v`: `-wheels-.tar.gz`. -### `--gh` defaults +### `gh` defaults - `--name` → repo basename, lowercased (e.g. `Pennyw0rth/NetExec` → `netexec`). - `--tag` → the normalized `--ver`. - `--epoch` → auto-derived from the GitHub release `published_at` (below). -## `--gh` flow +## `gh` flow 1. **Resolve the release / ref and epoch.** GET `https://api.github.com/repos//releases/tags/`. @@ -57,16 +57,16 @@ mkwheels --gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] project's metadata, resolves PyPI deps, and clones+builds the git deps into wheels. This is the only step that differs from PyPI mode. -4. **Emit outputs (shared with `--pypi`).** +4. **Emit outputs (shared with `pypi`).** Generate the pinned, hashed `requirements.txt` from the wheels dir, pack the normalized reproducible tarball, print epoch + md5. Identical to the current path. -## `--pypi` flow +## `pypi` flow Unchanged behavior from the current tool: resolve `==` via `pip download`, auto-derive epoch from PyPI `upload_time_iso_8601` when -`--epoch` is omitted. Only the surface changes: gated behind the `--pypi` +`--epoch` is omitted. Only the surface changes: gated behind the `pypi` selector and switched from positionals to `--name` / `--ver` / `--epoch`. ## Shared internals @@ -80,8 +80,8 @@ input — the SlackBuild installs from the wheels via `--no-index --find-links`. ## Selftest -Keep the existing `--pypi` reproducibility check (two builds at a fixed epoch -must be byte-identical). Add a `--gh` reproducibility check against a small, +Keep the existing `pypi` reproducibility check (two builds at a fixed epoch +must be byte-identical). Add a `gh` reproducibility check against a small, pure-Python, GitHub-tagged package so the run stays fast. Two builds at a fixed epoch must be byte-identical. diff --git a/mkwheels b/mkwheels index 0de2350..2808462 100755 --- a/mkwheels +++ b/mkwheels @@ -18,68 +18,140 @@ set -o pipefail # so a tar/find failure can't be masked by gzip succeeding usage() { cat < [epoch] +usage: ${0##*/} pypi --name PKG --ver VER [--epoch N] + ${0##*/} gh --repo OWNER/REPO --ver VER [--name PKG] [--tag TAG] [--epoch N] -Build a reproducible pinned Python wheels tarball -wheels-.tar.gz +Build a reproducible pinned Python wheels tarball -wheels-.tar.gz plus a hashed requirements.txt, for vendoring into a SlackBuild. - PyPI package name and exact version to vendor. - [epoch] SOURCE_DATE_EPOCH for the tarball mtime. Omitted -> auto-derived - from the PyPI release upload time (a warning is printed). +Modes: + pypi Vendor a PyPI package. Epoch auto-derived from the PyPI upload time. + gh Vendor a GitHub source release (resolves PyPI + git deps via pip). + Epoch auto-derived from the GitHub release published_at. - OUTPUT env var: output directory (default: current dir). +Flags: + --name PKG Package name for the output filename. + gh: defaults to the repo basename, lowercased. + --ver VER Version for the output filename. A leading 'v' is stripped. + --repo OWNER/REPO GitHub repository (gh mode only). + --tag TAG Git tag to fetch (gh mode). Leading 'v' stripped for naming; + the real ref is resolved by trying then v. + Defaults to the normalized --ver. + --epoch N SOURCE_DATE_EPOCH for the tarball mtime. Overrides the + auto-derived value. + + OUTPUT env var: output directory (default: current dir). Requires: python3+pip, jq, curl, tar, gzip, md5sum. EOF } -case "${1:-}" in +die() { echo "error: $*" >&2; exit 1; } + +# ---- parse args ----------------------------------------------------------- + +mode=${1:-} +case "$mode" in + pypi|gh) shift ;; -h|--help) usage; exit 0 ;; + *) usage >&2; exit 2 ;; esac -[ $# -ge 2 ] && [ $# -le 3 ] || { usage >&2; exit 2; } -pkg=$1 -ver=$2 -epoch=${3:-} +name=""; ver=""; repo=""; tag=""; epoch="" +while [ $# -gt 0 ]; do + case "$1" in + --name) name=${2:-}; shift 2 ;; + --ver) ver=${2:-}; shift 2 ;; + --repo) repo=${2:-}; shift 2 ;; + --tag) tag=${2:-}; shift 2 ;; + --epoch) epoch=${2:-}; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) usage >&2; exit 2 ;; + esac +done + +# Strip a single leading 'v' from version-like strings. +strip_v() { printf '%s' "${1#v}"; } + +[ -n "$ver" ] || die "--ver is required" +ver=$(strip_v "$ver") + OUTPUT=${OUTPUT:-$PWD} # Check required tools up front. for tool in python3 jq curl tar gzip md5sum; do - command -v "$tool" >/dev/null 2>&1 || { - echo "error: required tool not found: $tool" >&2 - exit 1 - } + command -v "$tool" >/dev/null 2>&1 || die "required tool not found: $tool" done -python3 -m pip --version >/dev/null 2>&1 || { - echo "error: python3 pip module not available" >&2 - exit 1 -} +python3 -m pip --version >/dev/null 2>&1 || die "python3 pip module not available" -echo "mkwheels: $pkg $ver -> $OUTPUT/$pkg-wheels-$ver.tar.gz" - -# Resolve SOURCE_DATE_EPOCH. Explicit arg wins; otherwise derive it from the -# earliest file upload time of this version on PyPI (a real, reproducible, -# release-tied timestamp). -if [ -z "$epoch" ]; then - meta=$(curl -fsSL "https://pypi.org/pypi/$pkg/$ver/json") || { - echo "error: cannot fetch PyPI metadata for $pkg $ver" >&2 - exit 1 - } - iso=$(printf '%s' "$meta" \ - | jq -r '[.urls[].upload_time_iso_8601] | sort | .[0] // empty') - [ -n "$iso" ] || { - echo "error: no upload time found for $pkg $ver on PyPI" >&2 - exit 1 - } - epoch=$(date -u -d "$iso" +%s) - echo "warning: epoch not given; using PyPI upload time $iso (epoch $epoch)" >&2 -fi -export SOURCE_DATE_EPOCH="$epoch" +# ---- mode-specific resolution --------------------------------------------- +# Each mode sets: name (final), epoch (if not given), and pip_spec — the +# argument handed to `pip download` to populate the wheels dir. -# Throwaway workdir, cleaned on exit. work=$(mktemp -d) trap 'rm -rf "$work"' EXIT +if [ "$mode" = pypi ]; then + [ -n "$name" ] || die "pypi mode: --name is required" + # Pre-built wheels are downloaded as-is (deterministic). + pip_action=download + pip_spec="$name==$ver" + + if [ -z "$epoch" ]; then + meta=$(curl -fsSL "https://pypi.org/pypi/$name/$ver/json") \ + || die "cannot fetch PyPI metadata for $name $ver" + iso=$(printf '%s' "$meta" \ + | jq -r '[.urls[].upload_time_iso_8601] | sort | .[0] // empty') + [ -n "$iso" ] || die "no upload time found for $name $ver on PyPI" + epoch=$(date -u -d "$iso" +%s) + echo "warning: epoch not given; using PyPI upload time $iso (epoch $epoch)" >&2 + fi +else + # gh mode + [ -n "$repo" ] || die "gh mode: --repo OWNER/REPO is required" + [ -n "$name" ] || name=$(printf '%s' "${repo##*/}" | tr '[:upper:]' '[:lower:]') + [ -n "$tag" ] || tag="$ver" + tag=$(strip_v "$tag") + + # Resolve the real ref: try the bare tag, then v. + ref=""; rel="" + for cand in "$tag" "v$tag"; do + if rel=$(curl -fsSL \ + "https://api.github.com/repos/$repo/releases/tags/$cand" 2>/dev/null); then + ref=$cand + break + fi + done + [ -n "$ref" ] || die "no GitHub release found for $repo tag $tag (or v$tag)" + + if [ -z "$epoch" ]; then + iso=$(printf '%s' "$rel" | jq -r '.published_at // empty') + [ -n "$iso" ] || die "no published_at for $repo release $ref" + epoch=$(date -u -d "$iso" +%s) + echo "warning: epoch not given; using GitHub published_at $iso (epoch $epoch)" >&2 + fi + + # Download + unpack the tagged source. + src_tgz="$work/src.tar.gz" + curl -fsSL "https://github.com/$repo/archive/refs/tags/$ref.tar.gz" \ + -o "$src_tgz" || die "cannot fetch source tarball for $repo $ref" + mkdir -p "$work/src" + tar -xzf "$src_tgz" -C "$work/src" + # GitHub archives unpack to a single top-level dir. + src_dir=$(find "$work/src" -mindepth 1 -maxdepth 1 -type d | head -1) + [ -n "$src_dir" ] || die "unexpected source archive layout" + # A local source dir must be *built* to a wheel; `pip download ` only + # resolves metadata and leaves the project itself unmaterialized. `pip + # wheel` builds the project and all its deps into the wheels dir. + pip_action=wheel + pip_spec="$src_dir" +fi + +export SOURCE_DATE_EPOCH="$epoch" +echo "mkwheels: $name $ver -> $OUTPUT/$name-wheels-$ver.tar.gz" + +# ---- shared: resolve tree, emit requirements, pack tarball ---------------- + wheels="$work/wheels" mkdir -p "$wheels" @@ -87,8 +159,13 @@ mkdir -p "$wheels" python3 -m venv "$work/venv" "$work/venv/bin/pip" install --quiet --upgrade pip wheel >/dev/null -# Resolve the full tree into $wheels (sdists are built to wheels). -"$work/venv/bin/pip" download "$pkg==$ver" --dest "$wheels" +# Resolve the full tree into $wheels. pypi: download pre-built wheels (sdists +# are built to wheels). gh: build the local source + deps to wheels. +if [ "$pip_action" = download ]; then + "$work/venv/bin/pip" download "$pip_spec" --dest "$wheels" +else + "$work/venv/bin/pip" wheel "$pip_spec" --wheel-dir "$wheels" +fi # Emit a pinned, hashed requirements.txt from the downloaded files. Each # distribution is pinned to its version with a sha256 hash per file. @@ -98,17 +175,17 @@ for f in "$wheels"/*; do base=$(basename "$f") # name-version from the wheel/sdist filename: split on first two '-' fields # wheels: name-version-...; sdists: name-version.tar.gz - name=${base%%-*} + fname=${base%%-*} rest=${base#*-} - version=${rest%%-*} - version=${version%.tar.gz} + fver=${rest%%-*} + fver=${fver%.tar.gz} hash=$(python3 -c "import hashlib,sys;print(hashlib.sha256(open(sys.argv[1],'rb').read()).hexdigest())" "$f") - printf '%s==%s --hash=sha256:%s\n' "$name" "$version" "$hash" >> "$req" + printf '%s==%s --hash=sha256:%s\n' "$fname" "$fver" "$hash" >> "$req" done sort -o "$req" "$req" mkdir -p "$OUTPUT" -tarball="$OUTPUT/$pkg-wheels-$ver.tar.gz" +tarball="$OUTPUT/$name-wheels-$ver.tar.gz" # Reproducible archive: sorted entries, normalized ownership/mtime, gzip -n. # Run from $work so the archive holds a top-level 'wheels/' dir. diff --git a/selftest b/selftest index 126f5e8..87ebb3a 100755 --- a/selftest +++ b/selftest @@ -1,23 +1,38 @@ #!/bin/bash -# selftest — build six twice and assert the wheels tarballs are byte-identical. -# The smallest check that fails if the reproducible-tar normalization breaks. +# selftest — build twice and assert the wheels tarballs are byte-identical, for +# both modes. The smallest check that fails if the reproducible-tar +# normalization (or either mode's resolution) breaks. set -eu here=$(cd "$(dirname "$0")" && pwd) tmp=$(mktemp -d) trap 'rm -rf "$tmp"' EXIT -# Fixed epoch so both runs use the same mtime (we are testing tar determinism, -# not epoch derivation). -OUTPUT="$tmp/a" "$here/mkwheels" six 1.16.0 1620000000 >/dev/null -OUTPUT="$tmp/b" "$here/mkwheels" six 1.16.0 1620000000 >/dev/null +# Fixed epoch so both runs use the same mtime (we test tar determinism, not +# epoch derivation). +epoch=1620000000 +fail=0 -a=$(md5sum "$tmp/a/six-wheels-1.16.0.tar.gz" | cut -d' ' -f1) -b=$(md5sum "$tmp/b/six-wheels-1.16.0.tar.gz" | cut -d' ' -f1) +check() { + local label=$1 file=$2; shift 2 + OUTPUT="$tmp/a" "$here/mkwheels" "$@" --epoch "$epoch" >/dev/null + OUTPUT="$tmp/b" "$here/mkwheels" "$@" --epoch "$epoch" >/dev/null + local a b + a=$(md5sum "$tmp/a/$file" | cut -d' ' -f1) + b=$(md5sum "$tmp/b/$file" | cut -d' ' -f1) + if [ "$a" = "$b" ]; then + echo "PASS: $label reproducible ($a)" + else + echo "FAIL: $label tarballs differ ($a != $b)" >&2 + fail=1 + fi +} -if [ "$a" = "$b" ]; then - echo "PASS: reproducible ($a)" -else - echo "FAIL: tarballs differ ($a != $b)" >&2 - exit 1 -fi +# pypi mode: six from PyPI. +check pypi six-wheels-1.16.0.tar.gz pypi --name six --ver 1.16.0 + +# gh mode: pyparsing from its GitHub source release (pure-python, no runtime +# deps -> fast). Its tags have no 'v' prefix, exercising the bare-tag path. +check gh pyparsing-wheels-3.3.2.tar.gz gh --repo pyparsing/pyparsing --ver 3.3.2 + +exit "$fail" -- cgit v1.2.3