aboutsummaryrefslogtreecommitdiffstats
path: root/llama.cpp-vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp-vulkan')
-rw-r--r--llama.cpp-vulkan/README22
-rw-r--r--llama.cpp-vulkan/doinst.sh27
-rw-r--r--llama.cpp-vulkan/llama.cpp-vulkan.SlackBuild131
-rw-r--r--llama.cpp-vulkan/llama.cpp-vulkan.info10
-rw-r--r--llama.cpp-vulkan/rc.llama.cpp95
-rw-r--r--llama.cpp-vulkan/slack-desc19
6 files changed, 304 insertions, 0 deletions
diff --git a/llama.cpp-vulkan/README b/llama.cpp-vulkan/README
new file mode 100644
index 0000000..5509d44
--- /dev/null
+++ b/llama.cpp-vulkan/README
@@ -0,0 +1,22 @@
+llama.cpp
+
+LLM inference in C/C++
+
+The main goal of llama.cpp is to enable LLM inference with minimal
+setup and state-of-the-art performance on a wide range of hardware
+locally and in the cloud.
+
+ - Plain C/C++ implementation without any dependencies
+ - Apple silicon is a first-class citizen - optimized via ARM NEON,
+ Accelerate and Metal frameworks
+ - AVX, AVX2, AVX512 and AMX support for x86 architectures
+ - RVV, ZVFH, ZFH, ZICBOP and ZIHINTPAUSE support for RISC-V
+ architectures
+ - 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer
+ quantization for faster inference and reduced memory use
+ - Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for
+ AMD GPUs via HIP and Moore Threads GPUs via MUSA)
+ - Vulkan and SYCL backend support
+ - CPU+GPU hybrid inference to partially accelerate models larger than
+ the total VRAM capacity
+
diff --git a/llama.cpp-vulkan/doinst.sh b/llama.cpp-vulkan/doinst.sh
new file mode 100644
index 0000000..0b91476
--- /dev/null
+++ b/llama.cpp-vulkan/doinst.sh
@@ -0,0 +1,27 @@
+config() {
+ NEW="$1"
+ OLD="$(dirname $NEW)/$(basename $NEW .new)"
+ # If there's no config file by that name, mv it over:
+ if [ ! -r $OLD ]; then
+ mv $NEW $OLD
+ elif [ "$(cat $OLD | md5sum)" = "$(cat $NEW | md5sum)" ]; then
+ # toss the redundant copy
+ rm $NEW
+ fi
+ # Otherwise, we leave the .new copy for the admin to consider...
+}
+
+preserve_perms() {
+ NEW="$1"
+ OLD="$(dirname $NEW)/$(basename $NEW .new)"
+ if [ -e $OLD ]; then
+ cp -a $OLD ${NEW}.incoming
+ cat $NEW > ${NEW}.incoming
+ mv ${NEW}.incoming $NEW
+ fi
+ config $NEW
+}
+
+preserve_perms etc/rc.d/rc.llama.cpp.new
+config etc/rc.d/rc.llama.cpp.new
+
diff --git a/llama.cpp-vulkan/llama.cpp-vulkan.SlackBuild b/llama.cpp-vulkan/llama.cpp-vulkan.SlackBuild
new file mode 100644
index 0000000..dbbd4cf
--- /dev/null
+++ b/llama.cpp-vulkan/llama.cpp-vulkan.SlackBuild
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# Slackware build script for llama.cpp-vulkan
+
+# Copyright 2026 danix <danix@danix.xyz>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cd $(dirname $0) ; CWD=$(pwd)
+
+PRGNAM=llama.cpp-vulkan
+SRCNAM=llama.cpp
+VERSION=${VERSION:-b8611}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+PKGTYPE=${PKGTYPE:-tgz}
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i586 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+if [ ! -z "${PRINT_PACKAGE_NAME}" ]; then
+ echo "$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE"
+ exit 0
+fi
+
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+if [ "$ARCH" = "i586" ]; then
+ SLKCFLAGS="-O2 -march=i586 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+set -ex
+
+rm -rf $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+rm -rf $SRCNAM-$VERSION
+tar xvf $CWD/$SRCNAM-$VERSION.tar.gz
+cd $SRCNAM-$VERSION
+chown -R root:root .
+find -L . \
+ \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
+ -o -perm 511 \) -exec chmod 755 {} \; -o \
+ \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
+ -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
+
+mkdir -p build
+cd build
+ cmake .. \
+ -DCMAKE_C_FLAGS:STRING="$SLKCFLAGS" \
+ -DCMAKE_CXX_FLAGS:STRING="$SLKCFLAGS" \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DLIB_SUFFIX=${LIBDIRSUFFIX} \
+ -DMAN_INSTALL_DIR=/usr/man \
+ -B build \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DBUILD_SHARED_LIBS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DLLAMA_USE_SYSTEM_GGML=OFF \
+ -DGGML_NATIVE=ON \
+ -DGGML_ALL_WARNINGS=OFF \
+ -DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
+ -DGGML_BUILD_EXAMPLES=OFF \
+ -DGGML_BUILD_TESTS=OFF \
+ -DGGML_LTO=ON \
+ -DGGML_RPC=ON \
+ -DGGML_VULKAN=ON \
+ -DGGML_CUDA_FA_ALL_QUANTS=ON \
+ -DLLAMA_BUILD_NUMBER="${VERSION#b}" \
+ -Wno-dev
+
+ cmake --build build --
+ DESTDIR=$PKG cmake --install build
+cd ..
+
+rm -f $PKG/{,usr/}lib${LIBDIRSUFFIX}/*.la
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
+ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+find $PKG/usr/man -type f -exec gzip -9 {} \; || true
+for i in $( find $PKG/usr/man -type l ) ; do ln -s $( readlink $i ).gz $i.gz ; rm $i ; done || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a \
+ AUTHORS CODEOWNERS CONTRIBUTING.md LICENSE README.md SECURITY.md media \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+
+mkdir -p $PKG/etc/rc.d
+cat $CWD/rc.llama.cpp > $PKG/etc/rc.d/rc.llama.cpp.new
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+cat $CWD/doinst.sh > $PKG/install/doinst.sh
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE
diff --git a/llama.cpp-vulkan/llama.cpp-vulkan.info b/llama.cpp-vulkan/llama.cpp-vulkan.info
new file mode 100644
index 0000000..688e652
--- /dev/null
+++ b/llama.cpp-vulkan/llama.cpp-vulkan.info
@@ -0,0 +1,10 @@
+PRGNAM="llama.cpp-vulkan"
+VERSION="b8611"
+HOMEPAGE="https://github.com/ggml-org/llama.cpp"
+DOWNLOAD="https://github.com/ggml-org/llama.cpp/archive/b8611/llama.cpp-b8611.tar.gz"
+MD5SUM="1850929860dd3dc9df7ec4aea2fd156a"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+REQUIRES=""
+MAINTAINER="danix"
+EMAIL="danix@danix.xyz"
diff --git a/llama.cpp-vulkan/rc.llama.cpp b/llama.cpp-vulkan/rc.llama.cpp
new file mode 100644
index 0000000..82124f9
--- /dev/null
+++ b/llama.cpp-vulkan/rc.llama.cpp
@@ -0,0 +1,95 @@
+#!/bin/sh
+
+# Copyright 2026 danix <danix@danix.xyz>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# SERVER OPTIONS
+# many options can be set as env variables.
+# See https://github.com/ggml-org/llama.cpp/tree/master/tools/server
+
+RUNAS_USER=
+RUNAS_GROUP=users
+LLSRV=/usr/bin/llama-server
+#MODEL_DIR=
+MODEL= # which model to use
+HOST=127.0.0.1 # ip to bind the server to
+PORT=8181 # port for the server to listen to
+CONTEXT= #size of context
+TEMP=1.0
+TOP_K=20
+TOP_P=0.95
+MIN_P=0.00
+SLEEP_IDLE_SECONDS=500 # after how many seconds to unload the model from memory
+REASONING_FORMAT=deepseek
+GPU_LAYERS=all
+SPLIT_MODE=none
+NO_CONTEXT_SHIFT='--no-context-shift'
+LOG_FILE=/var/log/llama-server/server.log
+LLAMA_ARGS="--ctx-size $CONTEXT \
+ --jinja \
+ --temp $TEMP \
+ --top-k $TOP_K \
+ --top-p $TOP_P \
+ --min-p $MIN_P \
+ --host $HOST --port $PORT \
+ --reasoning-format $REASONING_FORMAT \
+ -sm $SPLIT_MODE \
+ $NO_CONTEXT_SHIFT \
+ -ngl $GPU_LAYERS \
+ --sleep-idle-seconds $SLEEP_IDLE_SECONDS \
+ -m $MODEL" # alternatively one could set the model directory and pass --models-dir to the script
+
+case "$1" in
+ stop)
+ if /usr/bin/pgrep -f "$LLSRV" >/dev/null; then
+ echo "Stopping llama-server..."
+ killall llama-server 2>/dev/null
+ else
+ echo "llama-server is not running..."
+ exit 1
+ fi
+ ;;
+ start)
+ echo "Starting llama-server..."
+ if [[ -d $(dirname $LOG_FILE) ]]; then
+ mkdir -p $(dirname $LOG_FILE)
+ touch $LOG_FILE
+ chown -R ${RUNAS_USER}:${RUNAS_GROUP} $(dirname $LOG_FILE)
+ fi
+ su $RUNAS_USER -c "$LLSRV $LLAMA_ARGS --log-file $LOG_FILE -lv 3 --log-timestamps" 1>/dev/null 2>&1 &
+ $0 status
+ ;;
+ restart)
+ $0 stop
+ sleep 1
+ $0 start
+ ;;
+ status)
+ if /usr/bin/pgrep -f "$LLSRV" >/dev/null; then
+ echo "llama-server is running"
+ echo "Browser interface is at:"
+ echo "http://${HOST}:${PORT}"
+ fi
+ ;;
+ *)
+ echo "usage: $0 { start | stop | status | restart }" >&2
+ exit 1
+ ;;
+esac
diff --git a/llama.cpp-vulkan/slack-desc b/llama.cpp-vulkan/slack-desc
new file mode 100644
index 0000000..273e15e
--- /dev/null
+++ b/llama.cpp-vulkan/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct. It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+ |-----handy-ruler------------------------------------------------------|
+llama.cpp-vulkan: llama.cpp-vulkan (LLM inference in C/C++)
+llama.cpp-vulkan:
+llama.cpp-vulkan: Port of Facebook's LLaMA model in C/C++ with Vulkan GPU optimizations
+llama.cpp-vulkan:
+llama.cpp-vulkan: The main goal of llama.cpp is to enable LLM inference with minimal
+llama.cpp-vulkan: setup and state-of-the-art performance on a wide range of hardware
+llama.cpp-vulkan: locally and in the cloud.
+llama.cpp-vulkan:
+llama.cpp-vulkan: Home: https://github.com/ggml-org/llama.cpp
+llama.cpp-vulkan:
+llama.cpp-vulkan: