diff --git a/Directory.Packages.props b/Directory.Packages.props
index 9845347..98d77ab 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -10,7 +10,8 @@
-
-
+
+
+
\ No newline at end of file
diff --git a/Journal.AI/Journal.AI.csproj b/Journal.AI/Journal.AI.csproj
index 32ff915..bf333ad 100644
--- a/Journal.AI/Journal.AI.csproj
+++ b/Journal.AI/Journal.AI.csproj
@@ -3,6 +3,7 @@
+
diff --git a/Journal.AI/LlamaSharpAiService.cs b/Journal.AI/LlamaSharpAiService.cs
index 0844659..7a5e58c 100644
--- a/Journal.AI/LlamaSharpAiService.cs
+++ b/Journal.AI/LlamaSharpAiService.cs
@@ -18,7 +18,7 @@ public sealed partial class LlamaSharpAiService(JournalConfig config) : IAiServi
private readonly string _configuredModelPath = config.GgufModelPath;
private readonly uint _contextSize = (uint)Math.Clamp(config.ModelContextTokens, 512, 4096);
- private readonly int _gpuLayers = config.LlamaCppTimeout;
+ private readonly int _gpuLayers = config.GpuLayerCount;
private readonly Lock _sync = new();
private string? _resolvedModelPath;
diff --git a/Journal.Core/Models/JournalConfig.cs b/Journal.Core/Models/JournalConfig.cs
index 0108f92..2b4afb5 100644
--- a/Journal.Core/Models/JournalConfig.cs
+++ b/Journal.Core/Models/JournalConfig.cs
@@ -13,6 +13,7 @@ public sealed record JournalConfig(
string LlamaCppUrl,
string LlamaCppModel,
int LlamaCppTimeout,
+ int GpuLayerCount,
string EmbeddingApiUrl,
string EmbeddingModelName,
int ModelContextTokens,
diff --git a/Journal.Core/Services/Config/JournalConfigService.cs b/Journal.Core/Services/Config/JournalConfigService.cs
index c73b3b6..fe92abd 100644
--- a/Journal.Core/Services/Config/JournalConfigService.cs
+++ b/Journal.Core/Services/Config/JournalConfigService.cs
@@ -38,6 +38,7 @@ public sealed class JournalConfigService : IJournalConfigService
LlamaCppUrl: Environment.GetEnvironmentVariable("LLAMA_CPP_URL") ?? "http://127.0.0.1:8085/v1/completions",
LlamaCppModel: Environment.GetEnvironmentVariable("LLAMA_CPP_MODEL") ?? "qwen/qwen3-4b",
LlamaCppTimeout: ParseInt("LLAMA_CPP_TIMEOUT", 6000),
+ GpuLayerCount: ParseInt("JOURNAL_GPU_LAYERS", -1),
EmbeddingApiUrl: Environment.GetEnvironmentVariable("EMBEDDING_API_URL") ?? "http://127.0.0.1:8086/v1/embeddings",
EmbeddingModelName: Environment.GetEnvironmentVariable("EMBEDDING_MODEL_NAME") ?? "text-embedding-nomic-embed-text-v2-moe",
ModelContextTokens: ParseInt("MODEL_CONTEXT_TOKENS", 131072),
diff --git a/README.md b/README.md
index 5948489..bd9da34 100644
--- a/README.md
+++ b/README.md
@@ -165,7 +165,7 @@ dotnet run --project Journal.SmokeTests
NuGet package versions are managed centrally in `Directory.Packages.props`. Project-level `.csproj` files reference packages without version numbers.
- `Journal.Core` — `Microsoft.Data.Sqlite.Core`, `SQLitePCLRaw.bundle_e_sqlcipher`, `Microsoft.Extensions.DependencyInjection.Abstractions`
-- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu` + references `Journal.Core`
+- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu`, `LLamaSharp.Backend.Vulkan` + references `Journal.Core`
- `Journal.Sidecar` — `Microsoft.Extensions.DependencyInjection`, `NAudio`, `Whisper.net` + references `Journal.Core`, `Journal.AI`
- `Journal.WebGateway` — `Microsoft.NET.Sdk.Web` + references `Journal.Core`, `Journal.AI`
- `Journal.SmokeTests` — references `Journal.Core`
@@ -187,6 +187,7 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj
| `JOURNAL_VAULT_DIR` | `/journal/vault` | Override vault directory path |
| `JOURNAL_DATA_DIR` | _(empty)_ | Override decrypted data directory path |
| `JOURNAL_AI_PROVIDER` | `none` | AI provider mode (`none`, `llamasharp`) |
+| `JOURNAL_GPU_LAYERS` | `-1` (all) | Number of model layers to offload to GPU (`-1` = all, `0` = CPU only) |
| `JOURNAL_LOG_LEVEL` | `warning` | Log verbosity (`trace`, `debug`, `information`, `warning`, `error`, `critical`) |
| `JOURNAL_WEB_DIST` | auto | Override web UI dist path for WebGateway |
@@ -199,6 +200,18 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj
---
+## AI / LLM Notes
+
+The `Journal.AI` project uses **LLamaSharp** for local LLM inference.
+
+- **CPU backend** (`LLamaSharp.Backend.Cpu`) is always installed as a fallback.
+- **Vulkan backend** (`LLamaSharp.Backend.Vulkan`) provides GPU acceleration for AMD, Intel, and NVIDIA GPUs. LLamaSharp picks the best available backend at runtime.
+- All backend packages must share the **same version**. Currently pinned to **0.25.0** because `LLamaSharp.Backend.Vulkan` has not yet published a 0.26.0 release. Watch the [NuGet page](https://www.nuget.org/packages/LLamaSharp.Backend.Vulkan) and upgrade all three packages together when a new version ships.
+- **Known issue**: on some machines the Vulkan backend falls back to CPU because the internal `vulkaninfo --summary` detection times out at 1 second. If you see CPU-only inference despite having a Vulkan-capable GPU, this is likely the cause. The LLamaSharp team has acknowledged the issue ([#930](https://github.com/SciSharp/LLamaSharp/issues/930)).
+- Set `JOURNAL_GPU_LAYERS=-1` (the default) to offload all model layers to the GPU, or `0` to force CPU-only.
+
+---
+
## Journal.WebGateway
An ASP.NET Core minimal API that wraps `Journal.Core` for browser use.