From 27cc379eb8c8d76d2977795669632fc8f66d805f Mon Sep 17 00:00:00 2001 From: Jacob Schmidt Date: Mon, 2 Mar 2026 21:21:22 -0600 Subject: [PATCH] feat: add Vulkan GPU backend and fix GpuLayerCount config - Downgrade LLamaSharp packages to 0.25.0 to match Vulkan backend availability - Add LLamaSharp.Backend.Vulkan for AMD/Intel/NVIDIA GPU acceleration - Fix _gpuLayers bug: was reading LlamaCppTimeout instead of a dedicated field - Add GpuLayerCount to JournalConfig, sourced from JOURNAL_GPU_LAYERS env var - Document AI/LLM notes in README (version pinning, known vulkaninfo issue) Co-Authored-By: Oz --- Directory.Packages.props | 5 +++-- Journal.AI/Journal.AI.csproj | 1 + Journal.AI/LlamaSharpAiService.cs | 2 +- Journal.Core/Models/JournalConfig.cs | 1 + .../Services/Config/JournalConfigService.cs | 1 + README.md | 15 ++++++++++++++- 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 9845347..98d77ab 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -10,7 +10,8 @@ - - + + + \ No newline at end of file diff --git a/Journal.AI/Journal.AI.csproj b/Journal.AI/Journal.AI.csproj index 32ff915..bf333ad 100644 --- a/Journal.AI/Journal.AI.csproj +++ b/Journal.AI/Journal.AI.csproj @@ -3,6 +3,7 @@ + diff --git a/Journal.AI/LlamaSharpAiService.cs b/Journal.AI/LlamaSharpAiService.cs index 0844659..7a5e58c 100644 --- a/Journal.AI/LlamaSharpAiService.cs +++ b/Journal.AI/LlamaSharpAiService.cs @@ -18,7 +18,7 @@ public sealed partial class LlamaSharpAiService(JournalConfig config) : IAiServi private readonly string _configuredModelPath = config.GgufModelPath; private readonly uint _contextSize = (uint)Math.Clamp(config.ModelContextTokens, 512, 4096); - private readonly int _gpuLayers = config.LlamaCppTimeout; + private readonly int _gpuLayers = config.GpuLayerCount; private readonly Lock _sync = new(); private string? _resolvedModelPath; diff --git a/Journal.Core/Models/JournalConfig.cs b/Journal.Core/Models/JournalConfig.cs index 0108f92..2b4afb5 100644 --- a/Journal.Core/Models/JournalConfig.cs +++ b/Journal.Core/Models/JournalConfig.cs @@ -13,6 +13,7 @@ public sealed record JournalConfig( string LlamaCppUrl, string LlamaCppModel, int LlamaCppTimeout, + int GpuLayerCount, string EmbeddingApiUrl, string EmbeddingModelName, int ModelContextTokens, diff --git a/Journal.Core/Services/Config/JournalConfigService.cs b/Journal.Core/Services/Config/JournalConfigService.cs index c73b3b6..fe92abd 100644 --- a/Journal.Core/Services/Config/JournalConfigService.cs +++ b/Journal.Core/Services/Config/JournalConfigService.cs @@ -38,6 +38,7 @@ public sealed class JournalConfigService : IJournalConfigService LlamaCppUrl: Environment.GetEnvironmentVariable("LLAMA_CPP_URL") ?? "http://127.0.0.1:8085/v1/completions", LlamaCppModel: Environment.GetEnvironmentVariable("LLAMA_CPP_MODEL") ?? "qwen/qwen3-4b", LlamaCppTimeout: ParseInt("LLAMA_CPP_TIMEOUT", 6000), + GpuLayerCount: ParseInt("JOURNAL_GPU_LAYERS", -1), EmbeddingApiUrl: Environment.GetEnvironmentVariable("EMBEDDING_API_URL") ?? "http://127.0.0.1:8086/v1/embeddings", EmbeddingModelName: Environment.GetEnvironmentVariable("EMBEDDING_MODEL_NAME") ?? "text-embedding-nomic-embed-text-v2-moe", ModelContextTokens: ParseInt("MODEL_CONTEXT_TOKENS", 131072), diff --git a/README.md b/README.md index 5948489..bd9da34 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ dotnet run --project Journal.SmokeTests NuGet package versions are managed centrally in `Directory.Packages.props`. Project-level `.csproj` files reference packages without version numbers. - `Journal.Core` — `Microsoft.Data.Sqlite.Core`, `SQLitePCLRaw.bundle_e_sqlcipher`, `Microsoft.Extensions.DependencyInjection.Abstractions` -- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu` + references `Journal.Core` +- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu`, `LLamaSharp.Backend.Vulkan` + references `Journal.Core` - `Journal.Sidecar` — `Microsoft.Extensions.DependencyInjection`, `NAudio`, `Whisper.net` + references `Journal.Core`, `Journal.AI` - `Journal.WebGateway` — `Microsoft.NET.Sdk.Web` + references `Journal.Core`, `Journal.AI` - `Journal.SmokeTests` — references `Journal.Core` @@ -187,6 +187,7 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj | `JOURNAL_VAULT_DIR` | `/journal/vault` | Override vault directory path | | `JOURNAL_DATA_DIR` | _(empty)_ | Override decrypted data directory path | | `JOURNAL_AI_PROVIDER` | `none` | AI provider mode (`none`, `llamasharp`) | +| `JOURNAL_GPU_LAYERS` | `-1` (all) | Number of model layers to offload to GPU (`-1` = all, `0` = CPU only) | | `JOURNAL_LOG_LEVEL` | `warning` | Log verbosity (`trace`, `debug`, `information`, `warning`, `error`, `critical`) | | `JOURNAL_WEB_DIST` | auto | Override web UI dist path for WebGateway | @@ -199,6 +200,18 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj --- +## AI / LLM Notes + +The `Journal.AI` project uses **LLamaSharp** for local LLM inference. + +- **CPU backend** (`LLamaSharp.Backend.Cpu`) is always installed as a fallback. +- **Vulkan backend** (`LLamaSharp.Backend.Vulkan`) provides GPU acceleration for AMD, Intel, and NVIDIA GPUs. LLamaSharp picks the best available backend at runtime. +- All backend packages must share the **same version**. Currently pinned to **0.25.0** because `LLamaSharp.Backend.Vulkan` has not yet published a 0.26.0 release. Watch the [NuGet page](https://www.nuget.org/packages/LLamaSharp.Backend.Vulkan) and upgrade all three packages together when a new version ships. +- **Known issue**: on some machines the Vulkan backend falls back to CPU because the internal `vulkaninfo --summary` detection times out at 1 second. If you see CPU-only inference despite having a Vulkan-capable GPU, this is likely the cause. The LLamaSharp team has acknowledged the issue ([#930](https://github.com/SciSharp/LLamaSharp/issues/930)). +- Set `JOURNAL_GPU_LAYERS=-1` (the default) to offload all model layers to the GPU, or `0` to force CPU-only. + +--- + ## Journal.WebGateway An ASP.NET Core minimal API that wraps `Journal.Core` for browser use.