feat: add Vulkan GPU backend and fix GpuLayerCount config
- Downgrade LLamaSharp packages to 0.25.0 to match Vulkan backend availability - Add LLamaSharp.Backend.Vulkan for AMD/Intel/NVIDIA GPU acceleration - Fix _gpuLayers bug: was reading LlamaCppTimeout instead of a dedicated field - Add GpuLayerCount to JournalConfig, sourced from JOURNAL_GPU_LAYERS env var - Document AI/LLM notes in README (version pinning, known vulkaninfo issue) Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
parent
b4fa65c881
commit
27cc379eb8
@ -10,7 +10,8 @@
|
|||||||
<PackageVersion Include="NAudio" Version="2.2.1" />
|
<PackageVersion Include="NAudio" Version="2.2.1" />
|
||||||
<PackageVersion Include="Whisper.net" Version="1.9.0" />
|
<PackageVersion Include="Whisper.net" Version="1.9.0" />
|
||||||
<PackageVersion Include="Whisper.net.Runtime" Version="1.9.0" />
|
<PackageVersion Include="Whisper.net.Runtime" Version="1.9.0" />
|
||||||
<PackageVersion Include="LLamaSharp" Version="0.26.0" />
|
<PackageVersion Include="LLamaSharp" Version="0.25.0" />
|
||||||
<PackageVersion Include="LLamaSharp.Backend.Cpu" Version="0.26.0" />
|
<PackageVersion Include="LLamaSharp.Backend.Cpu" Version="0.25.0" />
|
||||||
|
<PackageVersion Include="LLamaSharp.Backend.Vulkan" Version="0.25.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
@ -3,6 +3,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="LLamaSharp" />
|
<PackageReference Include="LLamaSharp" />
|
||||||
<PackageReference Include="LLamaSharp.Backend.Cpu" />
|
<PackageReference Include="LLamaSharp.Backend.Cpu" />
|
||||||
|
<PackageReference Include="LLamaSharp.Backend.Vulkan" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|||||||
@ -18,7 +18,7 @@ public sealed partial class LlamaSharpAiService(JournalConfig config) : IAiServi
|
|||||||
|
|
||||||
private readonly string _configuredModelPath = config.GgufModelPath;
|
private readonly string _configuredModelPath = config.GgufModelPath;
|
||||||
private readonly uint _contextSize = (uint)Math.Clamp(config.ModelContextTokens, 512, 4096);
|
private readonly uint _contextSize = (uint)Math.Clamp(config.ModelContextTokens, 512, 4096);
|
||||||
private readonly int _gpuLayers = config.LlamaCppTimeout;
|
private readonly int _gpuLayers = config.GpuLayerCount;
|
||||||
|
|
||||||
private readonly Lock _sync = new();
|
private readonly Lock _sync = new();
|
||||||
private string? _resolvedModelPath;
|
private string? _resolvedModelPath;
|
||||||
|
|||||||
@ -13,6 +13,7 @@ public sealed record JournalConfig(
|
|||||||
string LlamaCppUrl,
|
string LlamaCppUrl,
|
||||||
string LlamaCppModel,
|
string LlamaCppModel,
|
||||||
int LlamaCppTimeout,
|
int LlamaCppTimeout,
|
||||||
|
int GpuLayerCount,
|
||||||
string EmbeddingApiUrl,
|
string EmbeddingApiUrl,
|
||||||
string EmbeddingModelName,
|
string EmbeddingModelName,
|
||||||
int ModelContextTokens,
|
int ModelContextTokens,
|
||||||
|
|||||||
@ -38,6 +38,7 @@ public sealed class JournalConfigService : IJournalConfigService
|
|||||||
LlamaCppUrl: Environment.GetEnvironmentVariable("LLAMA_CPP_URL") ?? "http://127.0.0.1:8085/v1/completions",
|
LlamaCppUrl: Environment.GetEnvironmentVariable("LLAMA_CPP_URL") ?? "http://127.0.0.1:8085/v1/completions",
|
||||||
LlamaCppModel: Environment.GetEnvironmentVariable("LLAMA_CPP_MODEL") ?? "qwen/qwen3-4b",
|
LlamaCppModel: Environment.GetEnvironmentVariable("LLAMA_CPP_MODEL") ?? "qwen/qwen3-4b",
|
||||||
LlamaCppTimeout: ParseInt("LLAMA_CPP_TIMEOUT", 6000),
|
LlamaCppTimeout: ParseInt("LLAMA_CPP_TIMEOUT", 6000),
|
||||||
|
GpuLayerCount: ParseInt("JOURNAL_GPU_LAYERS", -1),
|
||||||
EmbeddingApiUrl: Environment.GetEnvironmentVariable("EMBEDDING_API_URL") ?? "http://127.0.0.1:8086/v1/embeddings",
|
EmbeddingApiUrl: Environment.GetEnvironmentVariable("EMBEDDING_API_URL") ?? "http://127.0.0.1:8086/v1/embeddings",
|
||||||
EmbeddingModelName: Environment.GetEnvironmentVariable("EMBEDDING_MODEL_NAME") ?? "text-embedding-nomic-embed-text-v2-moe",
|
EmbeddingModelName: Environment.GetEnvironmentVariable("EMBEDDING_MODEL_NAME") ?? "text-embedding-nomic-embed-text-v2-moe",
|
||||||
ModelContextTokens: ParseInt("MODEL_CONTEXT_TOKENS", 131072),
|
ModelContextTokens: ParseInt("MODEL_CONTEXT_TOKENS", 131072),
|
||||||
|
|||||||
15
README.md
15
README.md
@ -165,7 +165,7 @@ dotnet run --project Journal.SmokeTests
|
|||||||
NuGet package versions are managed centrally in `Directory.Packages.props`. Project-level `.csproj` files reference packages without version numbers.
|
NuGet package versions are managed centrally in `Directory.Packages.props`. Project-level `.csproj` files reference packages without version numbers.
|
||||||
|
|
||||||
- `Journal.Core` — `Microsoft.Data.Sqlite.Core`, `SQLitePCLRaw.bundle_e_sqlcipher`, `Microsoft.Extensions.DependencyInjection.Abstractions`
|
- `Journal.Core` — `Microsoft.Data.Sqlite.Core`, `SQLitePCLRaw.bundle_e_sqlcipher`, `Microsoft.Extensions.DependencyInjection.Abstractions`
|
||||||
- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu` + references `Journal.Core`
|
- `Journal.AI` — `LLamaSharp`, `LLamaSharp.Backend.Cpu`, `LLamaSharp.Backend.Vulkan` + references `Journal.Core`
|
||||||
- `Journal.Sidecar` — `Microsoft.Extensions.DependencyInjection`, `NAudio`, `Whisper.net` + references `Journal.Core`, `Journal.AI`
|
- `Journal.Sidecar` — `Microsoft.Extensions.DependencyInjection`, `NAudio`, `Whisper.net` + references `Journal.Core`, `Journal.AI`
|
||||||
- `Journal.WebGateway` — `Microsoft.NET.Sdk.Web` + references `Journal.Core`, `Journal.AI`
|
- `Journal.WebGateway` — `Microsoft.NET.Sdk.Web` + references `Journal.Core`, `Journal.AI`
|
||||||
- `Journal.SmokeTests` — references `Journal.Core`
|
- `Journal.SmokeTests` — references `Journal.Core`
|
||||||
@ -187,6 +187,7 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj
|
|||||||
| `JOURNAL_VAULT_DIR` | `<root>/journal/vault` | Override vault directory path |
|
| `JOURNAL_VAULT_DIR` | `<root>/journal/vault` | Override vault directory path |
|
||||||
| `JOURNAL_DATA_DIR` | _(empty)_ | Override decrypted data directory path |
|
| `JOURNAL_DATA_DIR` | _(empty)_ | Override decrypted data directory path |
|
||||||
| `JOURNAL_AI_PROVIDER` | `none` | AI provider mode (`none`, `llamasharp`) |
|
| `JOURNAL_AI_PROVIDER` | `none` | AI provider mode (`none`, `llamasharp`) |
|
||||||
|
| `JOURNAL_GPU_LAYERS` | `-1` (all) | Number of model layers to offload to GPU (`-1` = all, `0` = CPU only) |
|
||||||
| `JOURNAL_LOG_LEVEL` | `warning` | Log verbosity (`trace`, `debug`, `information`, `warning`, `error`, `critical`) |
|
| `JOURNAL_LOG_LEVEL` | `warning` | Log verbosity (`trace`, `debug`, `information`, `warning`, `error`, `critical`) |
|
||||||
| `JOURNAL_WEB_DIST` | auto | Override web UI dist path for WebGateway |
|
| `JOURNAL_WEB_DIST` | auto | Override web UI dist path for WebGateway |
|
||||||
|
|
||||||
@ -199,6 +200,18 @@ NuGet package versions are managed centrally in `Directory.Packages.props`. Proj
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## AI / LLM Notes
|
||||||
|
|
||||||
|
The `Journal.AI` project uses **LLamaSharp** for local LLM inference.
|
||||||
|
|
||||||
|
- **CPU backend** (`LLamaSharp.Backend.Cpu`) is always installed as a fallback.
|
||||||
|
- **Vulkan backend** (`LLamaSharp.Backend.Vulkan`) provides GPU acceleration for AMD, Intel, and NVIDIA GPUs. LLamaSharp picks the best available backend at runtime.
|
||||||
|
- All backend packages must share the **same version**. Currently pinned to **0.25.0** because `LLamaSharp.Backend.Vulkan` has not yet published a 0.26.0 release. Watch the [NuGet page](https://www.nuget.org/packages/LLamaSharp.Backend.Vulkan) and upgrade all three packages together when a new version ships.
|
||||||
|
- **Known issue**: on some machines the Vulkan backend falls back to CPU because the internal `vulkaninfo --summary` detection times out at 1 second. If you see CPU-only inference despite having a Vulkan-capable GPU, this is likely the cause. The LLamaSharp team has acknowledged the issue ([#930](https://github.com/SciSharp/LLamaSharp/issues/930)).
|
||||||
|
- Set `JOURNAL_GPU_LAYERS=-1` (the default) to offload all model layers to the GPU, or `0` to force CPU-only.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Journal.WebGateway
|
## Journal.WebGateway
|
||||||
|
|
||||||
An ASP.NET Core minimal API that wraps `Journal.Core` for browser use.
|
An ASP.NET Core minimal API that wraps `Journal.Core` for browser use.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user