Integrate local S2T into sidecar and stabilize live dictation

This commit is contained in:
Jacob Schmidt 2026-02-28 23:35:58 -06:00
parent aa197230a6
commit 14c2c4bceb
21 changed files with 1544 additions and 60 deletions

View File

@ -18,7 +18,8 @@
"dependencies": {
"@tauri-apps/api": "^2",
"@tauri-apps/plugin-dialog": "^2.6.0",
"@tauri-apps/plugin-opener": "^2"
"@tauri-apps/plugin-opener": "^2",
"tauri-plugin-mic-recorder-api": "^2.0.0"
},
"devDependencies": {
"@sveltejs/adapter-static": "^3.0.6",

View File

@ -41,47 +41,57 @@ function sidecarFileName() {
: "Journal.Sidecar";
}
function publishProject(projectPath, runtime) {
const publishArgs = [
"publish",
projectPath,
"-c",
"Release",
"-r",
runtime,
"--self-contained",
"-p:PublishSingleFile=true",
"-p:IncludeNativeLibrariesForSelfExtract=true",
"-p:IncludeAllContentForSelfExtract=true",
"-p:RestoreIgnoreFailedSources=true",
"-p:NuGetAudit=false",
"-o",
publishOutputDir,
];
const publish = spawnSync("dotnet", publishArgs, {
cwd: repoRoot,
stdio: "inherit",
});
if (publish.error) {
throw publish.error;
}
if (publish.status !== 0) {
process.exit(publish.status ?? 1);
}
}
function stageBinary(fileName) {
const publishedBinary = path.join(publishOutputDir, fileName);
const bundledBinary = path.join(tauriBinDir, fileName);
if (!existsSync(publishedBinary)) {
throw new Error(`Published binary not found: ${publishedBinary}`);
}
mkdirSync(tauriBinDir, { recursive: true });
copyFileSync(publishedBinary, bundledBinary);
console.log(`Staged binary for Tauri: ${bundledBinary}`);
}
const runtime = runtimeForCurrentPlatform();
const sidecarName = sidecarFileName();
const publishedSidecar = path.join(publishOutputDir, sidecarName);
const bundledSidecar = path.join(tauriBinDir, sidecarName);
console.log(
`Publishing sidecar for ${process.platform}/${process.arch} (${runtime})...`,
);
const publishArgs = [
"publish",
sidecarProject,
"-c",
"Release",
"-r",
runtime,
"--self-contained",
"-p:PublishSingleFile=true",
"-p:IncludeNativeLibrariesForSelfExtract=true",
"-p:RestoreIgnoreFailedSources=true",
"-p:NuGetAudit=false",
"-o",
publishOutputDir,
];
const publish = spawnSync("dotnet", publishArgs, {
cwd: repoRoot,
stdio: "inherit",
});
if (publish.error) {
throw publish.error;
}
if (publish.status !== 0) {
process.exit(publish.status ?? 1);
}
if (!existsSync(publishedSidecar)) {
throw new Error(`Published sidecar not found: ${publishedSidecar}`);
}
mkdirSync(tauriBinDir, { recursive: true });
copyFileSync(publishedSidecar, bundledSidecar);
console.log(`Staged sidecar for Tauri: ${bundledSidecar}`);
console.log("Publishing Journal.Sidecar...");
publishProject(sidecarProject, runtime);
stageBinary(sidecarName);

View File

@ -32,6 +32,28 @@ dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "alsa"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed7572b7ba83a31e20d1b48970ee402d2e3e0537dcfe0a3ff4d6eb7508617d43"
dependencies = [
"alsa-sys",
"bitflags 2.11.0",
"cfg-if",
"libc",
]
[[package]]
name = "alsa-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527"
dependencies = [
"libc",
"pkg-config",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
@ -41,6 +63,56 @@ dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys 0.61.2",
]
[[package]]
name = "anyhow"
version = "1.0.102"
@ -225,6 +297,24 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bindgen"
version = "0.72.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
dependencies = [
"bitflags 2.11.0",
"cexpr",
"clang-sys",
"itertools",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn 2.0.117",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@ -393,6 +483,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
@ -402,6 +494,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfb"
version = "0.7.3"
@ -436,11 +537,70 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-link 0.2.1",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"glob",
"libc",
"libloading 0.8.9",
]
[[package]]
name = "clap"
version = "4.5.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "clap_lex"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "combine"
version = "4.6.7"
@ -516,6 +676,49 @@ dependencies = [
"libc",
]
[[package]]
name = "coreaudio-rs"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace"
dependencies = [
"bitflags 1.3.2",
"core-foundation-sys",
"coreaudio-sys",
]
[[package]]
name = "coreaudio-sys"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6"
dependencies = [
"bindgen",
]
[[package]]
name = "cpal"
version = "0.15.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "873dab07c8f743075e57f524c583985fbaf745602acbe916a01539364369a779"
dependencies = [
"alsa",
"core-foundation-sys",
"coreaudio-rs",
"dasp_sample",
"jni",
"js-sys",
"libc",
"mach2",
"ndk 0.8.0",
"ndk-context",
"oboe",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"windows 0.54.0",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
@ -631,6 +834,12 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "dasp_sample"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
[[package]]
name = "deranged"
version = "0.5.8"
@ -773,6 +982,12 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "embed-resource"
version = "3.0.6"
@ -1409,6 +1624,12 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hound"
version = "3.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
[[package]]
name = "html5ever"
version = "0.29.1"
@ -1719,6 +1940,21 @@ dependencies = [
"once_cell",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.17"
@ -1770,6 +2006,16 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom 0.3.4",
"libc",
]
[[package]]
name = "journalapp"
version = "0.1.0"
@ -1779,6 +2025,7 @@ dependencies = [
"tauri",
"tauri-build",
"tauri-plugin-dialog",
"tauri-plugin-mic-recorder",
"tauri-plugin-opener",
"tokio",
]
@ -1870,7 +2117,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf"
dependencies = [
"gtk-sys",
"libloading",
"libloading 0.7.4",
"once_cell",
]
@ -1890,6 +2137,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if",
"windows-link 0.2.1",
]
[[package]]
name = "libredox"
version = "0.1.12"
@ -1933,6 +2190,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mach2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44"
dependencies = [
"libc",
]
[[package]]
name = "markup5ever"
version = "0.14.1"
@ -1985,6 +2251,12 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
@ -2027,6 +2299,20 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "ndk"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7"
dependencies = [
"bitflags 2.11.0",
"jni-sys",
"log",
"ndk-sys 0.5.0+25.2.9519653",
"num_enum",
"thiserror 1.0.69",
]
[[package]]
name = "ndk"
version = "0.9.0"
@ -2036,7 +2322,7 @@ dependencies = [
"bitflags 2.11.0",
"jni-sys",
"log",
"ndk-sys",
"ndk-sys 0.6.0+11769913",
"num_enum",
"raw-window-handle",
"thiserror 1.0.69",
@ -2048,6 +2334,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b"
[[package]]
name = "ndk-sys"
version = "0.5.0+25.2.9519653"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691"
dependencies = [
"jni-sys",
]
[[package]]
name = "ndk-sys"
version = "0.6.0+11769913"
@ -2069,12 +2364,33 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-conv"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
[[package]]
name = "num-derive"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "num-traits"
version = "0.2.19"
@ -2318,12 +2634,41 @@ dependencies = [
"objc2-security",
]
[[package]]
name = "oboe"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb"
dependencies = [
"jni",
"ndk 0.8.0",
"ndk-context",
"num-derive",
"num-traits",
"oboe-sys",
]
[[package]]
name = "oboe-sys"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bb09a4a2b1d668170cfe0a7d5bc103f8999fb316c98099b6a9939c9f2e79d"
dependencies = [
"cc",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "open"
version = "5.3.3"
@ -2967,6 +3312,12 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
version = "0.4.1"
@ -3322,7 +3673,7 @@ checksum = "aac18da81ebbf05109ab275b157c22a653bb3c12cf884450179942f81bcbf6c3"
dependencies = [
"bytemuck",
"js-sys",
"ndk",
"ndk 0.9.0",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
@ -3486,9 +3837,9 @@ dependencies = [
"lazy_static",
"libc",
"log",
"ndk",
"ndk 0.9.0",
"ndk-context",
"ndk-sys",
"ndk-sys 0.6.0+11769913",
"objc2",
"objc2-app-kit",
"objc2-foundation",
@ -3499,7 +3850,7 @@ dependencies = [
"tao-macros",
"unicode-segmentation",
"url",
"windows",
"windows 0.61.3",
"windows-core 0.61.2",
"windows-version",
"x11-dl",
@ -3570,7 +3921,7 @@ dependencies = [
"webkit2gtk",
"webview2-com",
"window-vibrancy",
"windows",
"windows 0.61.3",
]
[[package]]
@ -3693,6 +4044,22 @@ dependencies = [
"url",
]
[[package]]
name = "tauri-plugin-mic-recorder"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ceccca393df4f90abba52602125d526c71ddd0557b03ac7dd9c3f818325b6d95"
dependencies = [
"chrono",
"clap",
"cpal",
"hound",
"serde",
"tauri",
"tauri-plugin",
"thiserror 2.0.18",
]
[[package]]
name = "tauri-plugin-opener"
version = "2.5.3"
@ -3711,7 +4078,7 @@ dependencies = [
"tauri-plugin",
"thiserror 2.0.18",
"url",
"windows",
"windows 0.61.3",
"zbus",
]
@ -3737,7 +4104,7 @@ dependencies = [
"url",
"webkit2gtk",
"webview2-com",
"windows",
"windows 0.61.3",
]
[[package]]
@ -3763,7 +4130,7 @@ dependencies = [
"url",
"webkit2gtk",
"webview2-com",
"windows",
"windows 0.61.3",
"wry",
]
@ -4268,6 +4635,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.21.0"
@ -4529,7 +4902,7 @@ checksum = "7130243a7a5b33c54a444e54842e6a9e133de08b5ad7b5861cd8ed9a6a5bc96a"
dependencies = [
"webview2-com-macros",
"webview2-com-sys",
"windows",
"windows 0.61.3",
"windows-core 0.61.2",
"windows-implement",
"windows-interface",
@ -4553,7 +4926,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "381336cfffd772377d291702245447a5251a2ffa5bad679c99e61bc48bacbf9c"
dependencies = [
"thiserror 2.0.18",
"windows",
"windows 0.61.3",
"windows-core 0.61.2",
]
@ -4603,6 +4976,16 @@ dependencies = [
"windows-version",
]
[[package]]
name = "windows"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
dependencies = [
"windows-core 0.54.0",
"windows-targets 0.52.6",
]
[[package]]
name = "windows"
version = "0.61.3"
@ -4625,6 +5008,16 @@ dependencies = [
"windows-core 0.61.2",
]
[[package]]
name = "windows-core"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
dependencies = [
"windows-result 0.1.2",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-core"
version = "0.61.2"
@ -4706,6 +5099,15 @@ dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "windows-result"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.3.4"
@ -5125,7 +5527,7 @@ dependencies = [
"jni",
"kuchikiki",
"libc",
"ndk",
"ndk 0.9.0",
"objc2",
"objc2-app-kit",
"objc2-core-foundation",
@ -5143,7 +5545,7 @@ dependencies = [
"webkit2gtk",
"webkit2gtk-sys",
"webview2-com",
"windows",
"windows 0.61.3",
"windows-core 0.61.2",
"windows-version",
"x11-dl",

View File

@ -2,7 +2,7 @@
name = "journalapp"
version = "0.1.0"
description = "A Tauri App"
authors = ["Stan"]
authors = ["Stan", "J. Schmidt"]
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -23,4 +23,5 @@ tauri-plugin-dialog = "2"
tauri-plugin-opener = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["process", "io-util", "sync"] }
tokio = { version = "1", features = ["process", "io-util", "sync", "time"] }
tauri-plugin-mic-recorder = "2.0.0"

View File

@ -3,5 +3,10 @@
"identifier": "default",
"description": "Capability for the main window",
"windows": ["main"],
"permissions": ["core:default", "dialog:default", "opener:default"]
"permissions": [
"core:default",
"dialog:default",
"opener:default",
"mic-recorder:default"
]
}

View File

@ -5,7 +5,7 @@ use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use tauri::Manager;
use tauri::{Emitter, Manager};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::process::{Child, ChildStdin, ChildStdout, Command};
use tokio::sync::Mutex;
@ -109,6 +109,10 @@ struct ManagedSidecar {
stdout: BufReader<ChildStdout>,
}
struct ManagedSpeechProcess {
poll_task: tokio::task::JoinHandle<()>,
}
impl ManagedSidecar {
fn start(root: &Path, resource_dir: Option<&Path>) -> Result<Self, String> {
let sidecar_path = resolve_sidecar_path(root, resource_dir)?;
@ -182,8 +186,15 @@ impl Drop for ManagedSidecar {
fn drop(&mut self) {}
}
impl ManagedSpeechProcess {
fn is_running(&self) -> bool {
!self.poll_task.is_finished()
}
}
struct SidecarState {
process: Mutex<Option<ManagedSidecar>>,
speech_process: Mutex<Option<ManagedSpeechProcess>>,
root_override: Mutex<Option<PathBuf>>,
config_path: PathBuf,
resource_dir: Option<PathBuf>,
@ -237,6 +248,11 @@ fn resolve_sidecar_path(root: &Path, resource_dir: Option<&Path>) -> Result<Path
return Ok(root_exe_path);
}
let tauri_bin_sidecar_path = root.join("Journal.App").join("src-tauri").join("bin").join(exe_name);
if tauri_bin_sidecar_path.exists() {
return Ok(tauri_bin_sidecar_path);
}
let sidecar_src_root = root.join("Journal.Sidecar");
if let Some(path) = find_sidecar_executable(&sidecar_src_root, exe_name) {
return Ok(path);
@ -255,6 +271,15 @@ fn resolve_sidecar_path(root: &Path, resource_dir: Option<&Path>) -> Result<Path
))
}
fn parse_command_response(response_line: &str) -> Result<Value, String> {
serde_json::from_str::<Value>(response_line)
.map_err(|err| format!("Invalid sidecar JSON response: {err}"))
}
fn read_field<'a>(data: &'a Value, camel: &str, pascal: &str) -> Option<&'a Value> {
data.get(camel).or_else(|| data.get(pascal))
}
fn find_sidecar_executable(search_root: &Path, exe_name: &str) -> Option<PathBuf> {
if !search_root.is_dir() {
return None;
@ -332,11 +357,52 @@ async fn send_with_managed_sidecar(
Err("Failed to send command to sidecar.".to_string())
}
async fn send_sidecar_action(
state: &SidecarState,
action: &str,
payload: Option<Value>,
) -> Result<Value, String> {
let envelope = serde_json::json!({
"action": action,
"payload": payload.unwrap_or_else(|| serde_json::json!({}))
});
let input_line = serde_json::to_string(&envelope)
.map_err(|err| format!("Serialize command failed: {err}"))?;
let response_line = send_with_managed_sidecar(state, &input_line).await?;
let response = parse_command_response(&response_line)?;
let ok = response
.get("ok")
.and_then(|node| node.as_bool())
.unwrap_or(false);
if !ok {
let err = response
.get("error")
.and_then(|node| node.as_str())
.unwrap_or("Sidecar command failed.");
return Err(err.to_string());
}
Ok(response
.get("data")
.cloned()
.unwrap_or_else(|| serde_json::json!({})))
}
async fn stop_managed_sidecar(state: &SidecarState) {
let mut guard = state.process.lock().await;
guard.take();
}
async fn stop_speech_process(state: &SidecarState) -> Result<(), String> {
let mut guard = state.speech_process.lock().await;
if let Some(process) = guard.take() {
process.poll_task.abort();
}
Ok(())
}
#[tauri::command]
async fn get_sidecar_root(state: tauri::State<'_, SidecarState>) -> Result<Value, String> {
let root_override = state.root_override.lock().await.clone();
@ -425,11 +491,151 @@ async fn shutdown(
state: tauri::State<'_, SidecarState>,
app_handle: tauri::AppHandle,
) -> Result<(), String> {
stop_speech_process(state.inner()).await?;
let _ = send_sidecar_action(state.inner(), "speech.live.stop", None).await;
stop_managed_sidecar(state.inner()).await;
app_handle.exit(0);
Ok(())
}
#[tauri::command]
async fn speech_start(
state: tauri::State<'_, SidecarState>,
app_handle: tauri::AppHandle,
) -> Result<Value, String> {
let _ = app_handle.emit(
"speech-status",
serde_json::json!({ "state": "starting", "message": "Starting speech process..." }),
);
{
let guard = state.speech_process.lock().await;
if let Some(existing) = guard.as_ref() {
if existing.is_running() {
return Ok(serde_json::json!({ "running": true }));
}
}
}
let start_data = send_sidecar_action(state.inner(), "speech.live.start", None).await?;
let running = read_field(&start_data, "running", "Running")
.and_then(|node| node.as_bool())
.unwrap_or(false);
let status = read_field(&start_data, "status", "Status")
.and_then(|node| node.as_str())
.unwrap_or("starting");
let warning = read_field(&start_data, "warning", "Warning")
.and_then(|node| node.as_str())
.map(|v| v.to_string());
let _ = app_handle.emit(
"speech-status",
serde_json::json!({ "state": status, "message": warning.clone().unwrap_or_else(|| status.to_string()) }),
);
if !running {
return Err(warning.unwrap_or_else(|| "Failed to start live speech.".to_string()));
}
let app_for_poll = app_handle.clone();
let poll_task = tokio::spawn(async move {
loop {
let state_handle = app_for_poll.state::<SidecarState>();
let poll_data = match send_sidecar_action(
state_handle.inner(),
"speech.live.poll",
Some(serde_json::json!({ "maxItems": 8 })),
)
.await
{
Ok(value) => value,
Err(err) => {
let _ = app_for_poll.emit(
"speech-status",
serde_json::json!({ "state": "error", "message": err }),
);
break;
}
};
if let Some(items) = read_field(&poll_data, "items", "Items").and_then(|node| node.as_array()) {
for item in items {
if let Some(text) = item.as_str() {
let _ = app_for_poll
.emit("speech-transcript", serde_json::json!({ "text": text }));
}
}
}
let running = read_field(&poll_data, "running", "Running")
.and_then(|node| node.as_bool())
.unwrap_or(false);
let status = read_field(&poll_data, "status", "Status")
.and_then(|node| node.as_str())
.unwrap_or(if running { "listening" } else { "stopped" });
let warning = read_field(&poll_data, "warning", "Warning")
.and_then(|node| node.as_str())
.map(|v| v.to_string());
if let Some(message) = warning {
let _ = app_for_poll.emit(
"speech-status",
serde_json::json!({ "state": if running { "listening" } else { "error" }, "message": message }),
);
} else {
let _ = app_for_poll.emit(
"speech-status",
serde_json::json!({ "state": status, "message": status }),
);
}
if !running {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(350)).await;
}
});
let mut guard = state.speech_process.lock().await;
*guard = Some(ManagedSpeechProcess { poll_task });
Ok(serde_json::json!({ "running": true }))
}
#[tauri::command]
async fn speech_stop(
state: tauri::State<'_, SidecarState>,
app_handle: tauri::AppHandle,
) -> Result<Value, String> {
stop_speech_process(state.inner()).await?;
let _ = send_sidecar_action(state.inner(), "speech.live.stop", None).await;
let _ = app_handle.emit(
"speech-status",
serde_json::json!({ "state": "stopped", "message": "Dictation stopped." }),
);
Ok(serde_json::json!({ "running": false }))
}
#[tauri::command]
async fn speech_cleanup_probe(path: String) -> Result<Value, String> {
if path.trim().is_empty() {
return Ok(serde_json::json!({ "deleted": false }));
}
let target = PathBuf::from(path);
let normalized = target.to_string_lossy().to_lowercase();
if !normalized.contains("tauri-plugin-mic-recorder") || !normalized.ends_with(".wav") {
return Ok(serde_json::json!({ "deleted": false }));
}
if !target.exists() {
return Ok(serde_json::json!({ "deleted": false }));
}
fs::remove_file(&target).map_err(|err| format!("Failed to remove probe recording: {err}"))?;
Ok(serde_json::json!({ "deleted": true }))
}
#[tauri::command]
async fn sidecar_command(
state: tauri::State<'_, SidecarState>,
@ -442,18 +648,21 @@ async fn sidecar_command(
let input_line = serde_json::to_string(&command)
.map_err(|err| format!("Serialize command failed: {err}"))?;
let response_line = send_with_managed_sidecar(state.inner(), &input_line).await?;
serde_json::from_str::<Value>(&response_line)
.map_err(|err| format!("Invalid sidecar JSON response: {err}"))
parse_command_response(&response_line)
}
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
let app = tauri::Builder::default()
.plugin(tauri_plugin_mic_recorder::init())
.plugin(tauri_plugin_dialog::init())
.plugin(tauri_plugin_opener::init())
.invoke_handler(tauri::generate_handler![
sidecar_command,
shutdown,
speech_start,
speech_stop,
speech_cleanup_probe,
get_sidecar_root,
set_sidecar_root,
get_ui_settings,
@ -468,6 +677,7 @@ pub fn run() {
app.manage(SidecarState {
process: Mutex::new(None),
speech_process: Mutex::new(None),
root_override: Mutex::new(root_override),
config_path,
resource_dir: app.path().resource_dir().ok(),
@ -483,6 +693,11 @@ pub fn run() {
if let Ok(mut guard) = state.process.try_lock() {
guard.take();
};
if let Ok(mut guard) = state.speech_process.try_lock() {
if let Some(speech) = guard.take() {
speech.poll_task.abort();
}
};
}
});
}

View File

@ -2,7 +2,7 @@
"$schema": "https://schema.tauri.app/config/2",
"productName": "Project Journal",
"version": "0.1.0",
"identifier": "com.stan.journal",
"identifier": "com.idsolutions.journal",
"build": {
"beforeDevCommand": "npm run dev",
"devUrl": "http://localhost:1420",

View File

@ -0,0 +1,33 @@
import { invoke } from "$lib/runtime/invoke";
import {
startRecording as startMicRecording,
stopRecording as stopMicRecording,
} from "tauri-plugin-mic-recorder-api";
type SpeechControlResult = {
running: boolean;
pid?: number;
launch?: string;
};
export async function startSpeechDictation(): Promise<SpeechControlResult> {
return invoke<SpeechControlResult>("speech_start");
}
export async function stopSpeechDictation(): Promise<SpeechControlResult> {
return invoke<SpeechControlResult>("speech_stop");
}
export async function probeMicrophoneAccess(): Promise<string> {
await startMicRecording();
await new Promise((resolve) => setTimeout(resolve, 300));
const outputPath = await stopMicRecording();
try {
await invoke<{ deleted: boolean }>("speech_cleanup_probe", {
path: outputPath,
});
} catch {
// Keep probe non-blocking; cleanup failure should not break dictation start.
}
return outputPath;
}

View File

@ -1,5 +1,11 @@
<!-- @format -->
<script lang="ts">
import {
probeMicrophoneAccess,
startSpeechDictation,
stopSpeechDictation,
} from "$lib/backend/speech";
import { isTauriRuntime } from "$lib/runtime/invoke";
import {
createFragmentFromParsed,
deleteFragmentByStoreId,
@ -12,6 +18,7 @@
} from "$lib/stores/fragments";
import { settingsFragmentTypes, settingsTags } from "$lib/stores/settings";
import { renderMarkdown } from "$lib/utils/markdown";
import { onDestroy, onMount } from "svelte";
import { get } from "svelte/store";
export let openDocumentId = "";
@ -37,6 +44,12 @@
let fragmentTypeOptions: string[] = [];
let tagOptions: string[] = [];
let suppressExternalEditRequest = false;
let dictationBusy = false;
let dictationActive = false;
let dictationError = "";
let dictationStatus = "";
let unlistenTranscript: (() => void) | null = null;
let unlistenSpeechStatus: (() => void) | null = null;
const customTypeValue = "__custom_type__";
const customTagValue = "__custom_tag__";
@ -145,6 +158,9 @@
}
function cancelFragmentEdit() {
if (dictationActive) {
void stopDictation();
}
if (fragmentMode === "create") {
fragmentMode = "view";
suppressExternalEditRequest = true;
@ -195,6 +211,149 @@
fragmentMode = isDraftFragment ? "create" : "view";
}
function appendDictationChunk(text: string) {
const cleaned = text.trim();
if (!cleaned) return;
const prefix =
fragmentBody.length > 0 &&
!fragmentBody.endsWith(" ") &&
!fragmentBody.endsWith("\n") &&
!fragmentBody.endsWith("\t")
? " "
: "";
fragmentBody = `${fragmentBody}${prefix}${cleaned} `;
}
async function startDictation() {
if (dictationBusy || dictationActive) return;
if (!isTauriRuntime()) {
dictationError = "Speech dictation is available in the desktop app only.";
return;
}
dictationBusy = true;
dictationError = "";
dictationStatus = "Checking microphone access...";
try {
await probeMicrophoneAccess();
dictationStatus = "Starting dictation...";
const started = await Promise.race([
startSpeechDictation(),
new Promise<never>((_, reject) =>
setTimeout(
() =>
reject(
new Error("Timed out waiting for speech process startup."),
),
8000,
),
),
]);
dictationActive = true;
if (started?.pid) {
dictationStatus = "Dictation started.";
}
} catch (error) {
dictationError = String(error);
dictationActive = false;
} finally {
dictationBusy = false;
}
}
async function stopDictation() {
if (dictationBusy || !dictationActive) return;
dictationBusy = true;
dictationError = "";
try {
await stopSpeechDictation();
} catch (error) {
dictationError = String(error);
} finally {
dictationActive = false;
dictationStatus = "Dictation stopped.";
dictationBusy = false;
}
}
async function toggleDictation() {
if (dictationActive) {
await stopDictation();
return;
}
await startDictation();
}
onMount(() => {
if (!isTauriRuntime()) return;
let disposed = false;
void (async () => {
const { listen } = await import("@tauri-apps/api/event");
const unlisten = await listen<{ text?: string }>(
"speech-transcript",
(event) => {
if (disposed || !dictationActive) return;
if (fragmentMode !== "edit" && fragmentMode !== "create") return;
const text = event.payload?.text ?? "";
appendDictationChunk(text);
},
);
const unlistenStatus = await listen<{ state?: string; message?: string }>(
"speech-status",
(event) => {
const state = event.payload?.state ?? "";
const message = event.payload?.message ?? "";
if (message) {
dictationStatus = message;
}
if (state === "listening") {
dictationError = "";
return;
}
if (state === "error") {
dictationError = message || "Speech process error.";
dictationActive = false;
return;
}
if (state === "stopped") {
dictationActive = false;
}
},
);
if (disposed) {
unlisten();
return;
}
unlistenTranscript = unlisten;
unlistenSpeechStatus = unlistenStatus;
})();
return () => {
disposed = true;
if (unlistenTranscript) {
unlistenTranscript();
unlistenTranscript = null;
}
if (unlistenSpeechStatus) {
unlistenSpeechStatus();
unlistenSpeechStatus = null;
}
};
});
onDestroy(() => {
if (dictationActive) {
void stopDictation();
}
});
$: fragmentTypeOptions = $settingsFragmentTypes.length
? $settingsFragmentTypes
: ["General"];
@ -226,9 +385,22 @@
) {
fragmentMode = "edit";
}
$: if (fragmentMode === "view" && dictationActive) {
void stopDictation();
}
</script>
<section class="fragment-surface">
{#if dictationError || dictationStatus}
<div
class="dictation-indicator"
class:is-error={Boolean(dictationError)}
role="status"
aria-live="polite"
>
{dictationError || dictationStatus}
</div>
{/if}
{#if fragmentMode === "view"}
<article class="fragment-view">
{@html renderMarkdown(openDocumentContent)}
@ -297,6 +469,17 @@
aria-label="Fragment body"
></textarea>
<div class="fragment-actions">
<button
type="button"
class="fragment-secondary"
class:is-active={dictationActive}
on:click={() => void toggleDictation()}
disabled={dictationBusy || !isTauriRuntime()}
aria-label={dictationActive ? "Stop dictation" : "Start dictation"}
title={dictationActive ? "Stop dictation" : "Start dictation"}
>
{dictationActive ? "Stop Dictation" : "Start Dictation"}
</button>
<button type="submit" class="fragment-submit"
>{fragmentMode === "create"
? "Create Fragment"
@ -318,6 +501,31 @@
flex: 1;
overflow: auto;
padding: 0 14px 14px;
position: relative;
}
.dictation-indicator {
position: absolute;
top: 8px;
right: 22px;
z-index: 30;
border: 1px solid var(--border-soft);
border-radius: 8px;
background: color-mix(in srgb, var(--surface-1) 94%, var(--bg-editor) 6%);
color: var(--text-dim);
font-size: 0.76rem;
line-height: 1.35;
padding: 6px 9px;
max-width: min(56ch, 60%);
box-shadow: 0 8px 20px
color-mix(in srgb, var(--bg-app) 35%, transparent 65%);
pointer-events: none;
}
.dictation-indicator.is-error {
color: #fca5a5;
border-color: color-mix(in srgb, #ef4444 45%, var(--border-soft) 55%);
background: color-mix(in srgb, #2a1414 52%, var(--surface-1) 48%);
}
.fragment-form {
@ -469,11 +677,23 @@
color: var(--text-primary);
}
.fragment-secondary.is-active {
border-color: var(--border-strong);
color: var(--text-primary);
background: var(--bg-active);
}
@media (max-width: 980px) {
.fragment-surface {
padding: 4px 8px 10px;
}
.dictation-indicator {
right: 10px;
top: 6px;
max-width: calc(100% - 20px);
}
.fragment-form,
.fragment-view {
width: 100%;

View File

@ -5,7 +5,13 @@
loadEntryTemplate,
type EntryTemplateItemDto,
} from "$lib/backend/templates";
import {
probeMicrophoneAccess,
startSpeechDictation,
stopSpeechDictation,
} from "$lib/backend/speech";
import MarkdownToolbar from "$lib/components/editor/MarkdownToolbar.svelte";
import { isTauriRuntime } from "$lib/runtime/invoke";
import { entriesStore } from "$lib/stores/entries";
import { fragmentsStore } from "$lib/stores/fragments";
import { listsStore } from "$lib/stores/lists";
@ -15,7 +21,7 @@
todosStore,
} from "$lib/stores/todos";
import { renderMarkdown, extractEditorTitle } from "$lib/utils/markdown";
import { onMount } from "svelte";
import { onDestroy, onMount } from "svelte";
import { get } from "svelte/store";
export let openDocumentId = "";
@ -49,6 +55,12 @@
let listMode: ListMode = null;
let isManualSaving = false;
let manualSaveError = "";
let dictationBusy = false;
let dictationActive = false;
let dictationError = "";
let dictationStatus = "";
let unlistenTranscript: (() => void) | null = null;
let unlistenSpeechStatus: (() => void) | null = null;
let fragmentAttachmentOptions: AttachmentOption[] = [];
let listAttachmentOptions: AttachmentOption[] = [];
let todoAttachmentOptions: AttachmentOption[] = [];
@ -451,10 +463,188 @@
}
}
function insertDictationChunk(text: string) {
const cleaned = text.trim();
if (!cleaned) return;
const current = markdownText;
const insertionTail = `${cleaned} `;
if (!editorInput) {
const prefix =
current.length > 0 &&
!current.endsWith(" ") &&
!current.endsWith("\n") &&
!current.endsWith("\t")
? " "
: "";
updateDraft(`${current}${prefix}${insertionTail}`);
return;
}
const activeElement = document.activeElement;
const isFocused = activeElement === editorInput;
const previousScrollTop = editorInput.scrollTop;
const previousScrollLeft = editorInput.scrollLeft;
let start = editorInput.selectionStart ?? current.length;
let end = editorInput.selectionEnd ?? start;
if (!isFocused) {
start = current.length;
end = current.length;
}
const prefix =
start > 0 && !/\s/.test(current.charAt(Math.max(0, start - 1)))
? " "
: "";
const insertion = `${prefix}${insertionTail}`;
const next = `${current.slice(0, start)}${insertion}${current.slice(end)}`;
updateDraft(next);
queueMicrotask(() => {
if (!editorInput) return;
if (isFocused) {
const cursor = start + insertion.length;
editorInput.setSelectionRange(cursor, cursor);
}
editorInput.scrollTop = previousScrollTop;
editorInput.scrollLeft = previousScrollLeft;
});
}
async function startDictation() {
if (dictationBusy || dictationActive) return;
if (!isTauriRuntime()) {
dictationError = "Speech dictation is available in the desktop app only.";
return;
}
dictationBusy = true;
dictationError = "";
dictationStatus = "Checking microphone access...";
try {
await probeMicrophoneAccess();
dictationStatus = "Starting dictation...";
const started = await Promise.race([
startSpeechDictation(),
new Promise<never>((_, reject) =>
setTimeout(
() =>
reject(
new Error("Timed out waiting for speech process startup."),
),
8000,
),
),
]);
dictationActive = true;
if (started?.pid) {
dictationStatus = "Dictation started.";
}
} catch (error) {
dictationError = String(error);
dictationActive = false;
} finally {
dictationBusy = false;
}
}
async function stopDictation() {
if (dictationBusy || !dictationActive) return;
dictationBusy = true;
dictationError = "";
try {
await stopSpeechDictation();
} catch (error) {
dictationError = String(error);
} finally {
dictationActive = false;
dictationStatus = "Dictation stopped.";
dictationBusy = false;
}
}
async function toggleDictation() {
if (dictationActive) {
await stopDictation();
return;
}
await startDictation();
}
onMount(() => {
void refreshTemplates();
});
onMount(() => {
if (!isTauriRuntime()) return;
let disposed = false;
void (async () => {
const { listen } = await import("@tauri-apps/api/event");
const unlisten = await listen<{ text?: string }>(
"speech-transcript",
(event) => {
if (disposed || !dictationActive) return;
const text = event.payload?.text ?? "";
insertDictationChunk(text);
},
);
const unlistenStatus = await listen<{ state?: string; message?: string }>(
"speech-status",
(event) => {
const state = event.payload?.state ?? "";
const message = event.payload?.message ?? "";
if (message) {
dictationStatus = message;
}
if (state === "listening") {
dictationError = "";
return;
}
if (state === "error") {
dictationError = message || "Speech process error.";
dictationActive = false;
return;
}
if (state === "stopped") {
dictationActive = false;
}
},
);
if (disposed) {
unlisten();
return;
}
unlistenTranscript = unlisten;
unlistenSpeechStatus = unlistenStatus;
})();
return () => {
disposed = true;
if (unlistenTranscript) {
unlistenTranscript();
unlistenTranscript = null;
}
if (unlistenSpeechStatus) {
unlistenSpeechStatus();
unlistenSpeechStatus = null;
}
};
});
onDestroy(() => {
if (dictationActive) {
void stopDictation();
}
});
$: if (!previewOnly && !templateRefreshRequested) {
templateRefreshRequested = true;
void refreshTemplates();
@ -490,6 +680,16 @@
</header>
<section class="editor-surface" class:preview-only={previewOnly}>
{#if !previewOnly && (dictationError || dictationStatus)}
<div
class="dictation-indicator"
class:is-error={Boolean(dictationError)}
role="status"
aria-live="polite"
>
{dictationError || dictationStatus}
</div>
{/if}
{#if !previewOnly}
<MarkdownToolbar
{isEntryDocument}
@ -511,6 +711,10 @@
onToggleUl={() => toggleListMode("ul")}
onToggleOl={() => toggleListMode("ol")}
onCode={() => applyWrap("`")}
onToggleDictation={() => void toggleDictation()}
{dictationActive}
{dictationBusy}
dictationUnavailable={!isTauriRuntime()}
onSave={() => void handleManualSave()}
saveBusy={isManualSaving}
/>
@ -667,6 +871,7 @@
display: grid;
grid-template-rows: auto minmax(0, 1fr);
gap: 10px;
position: relative;
}
.editor-surface.preview-only {
@ -680,6 +885,30 @@
padding: 0 14px;
}
.dictation-indicator {
position: absolute;
top: 8px;
right: 22px;
z-index: 30;
border: 1px solid var(--border-soft);
border-radius: 8px;
background: color-mix(in srgb, var(--surface-1) 94%, var(--bg-editor) 6%);
color: var(--text-dim);
font-size: 0.76rem;
line-height: 1.35;
padding: 6px 9px;
max-width: min(56ch, 60%);
box-shadow: 0 8px 20px
color-mix(in srgb, var(--bg-app) 35%, transparent 65%);
pointer-events: none;
}
.dictation-indicator.is-error {
color: #fca5a5;
border-color: color-mix(in srgb, #ef4444 45%, var(--border-soft) 55%);
background: color-mix(in srgb, #2a1414 52%, var(--surface-1) 48%);
}
.attachment-modal-backdrop {
position: fixed;
inset: 0;
@ -888,6 +1117,12 @@
padding: 0 8px;
}
.dictation-indicator {
right: 10px;
top: 6px;
max-width: calc(100% - 20px);
}
.markdown-input,
.markdown-preview {
width: 100%;

View File

@ -22,6 +22,10 @@
export let onCode: () => void = () => {};
export let onSave: () => void = () => {};
export let saveBusy = false;
export let onToggleDictation: () => void = () => {};
export let dictationActive = false;
export let dictationBusy = false;
export let dictationUnavailable = false;
let headingMenuOpen = false;
let headingMenuEl: HTMLDivElement | null = null;
@ -262,6 +266,19 @@
</div>
<div class="toolbar-divider" aria-hidden="true"></div>
<div class="toolbar-group">
<button
type="button"
class="toolbar-btn toolbar-icon-btn"
class:is-active={dictationActive}
on:click={onToggleDictation}
disabled={dictationBusy || dictationUnavailable}
aria-label={dictationActive ? "Stop dictation" : "Start dictation"}
title={dictationActive ? "Stop dictation" : "Start dictation"}
>
<span class="material-symbols-outlined" aria-hidden="true"
>{dictationActive ? "mic_off" : "mic"}</span
>
</button>
<button
type="button"
class="toolbar-btn toolbar-icon-btn"

View File

@ -173,6 +173,11 @@ export async function invoke<T>(
}
case "shutdown":
return undefined as T;
case "speech_start":
case "speech_stop":
throw new Error(
"Speech dictation is available in the desktop app runtime only.",
);
default:
throw new Error(`Unsupported command in web runtime: ${command}`);
}

View File

@ -29,6 +29,7 @@ internal sealed record SpeechTranscribePayload(
string? Text = null,
int? SimulateDelayMs = null,
int? Simulate_Delay_Ms = null);
internal sealed record S2TPollPayload(int? MaxItems = null);
internal sealed record SearchEntriesPayload(
string? Query = null,
string? Section = null,

View File

@ -19,3 +19,19 @@ public sealed record SpeechTranscribeResultDto(
string Text,
string Engine,
string? Warning = null);
public sealed record S2TStartResultDto(
bool Running,
string Status,
string? Warning = null);
public sealed record S2TStopResultDto(
bool Running,
string Status,
string? Warning = null);
public sealed record S2TPollResultDto(
IReadOnlyList<string> Items,
bool Running,
string Status,
string? Warning = null);

View File

@ -25,6 +25,7 @@ public class Entry(
IJournalConfigService config,
IAiService ai,
ISpeechBridgeService speech,
IS2TService liveSpeech,
IEntryFileService entryFiles,
IListService lists,
ITodoService todos,
@ -38,6 +39,7 @@ public class Entry(
private readonly IJournalConfigService _config = config;
private readonly IAiService _ai = ai;
private readonly ISpeechBridgeService _speech = speech;
private readonly IS2TService _liveSpeech = liveSpeech;
private readonly IEntryFileService _entryFiles = entryFiles;
private readonly IListService _lists = lists;
private readonly ITodoService _todos = todos;
@ -325,6 +327,21 @@ public class Entry(
Text: text,
SimulateDelayMs: simulateDelayMs));
break;
case "speech.live.start":
result = await _liveSpeech.StartAsync();
break;
case "speech.live.stop":
result = await _liveSpeech.StopAsync();
break;
case "speech.live.poll":
var livePollPayload = DeserializePayload<S2TPollPayload>(cmd.Payload);
var maxItems = livePollPayload?.MaxItems ?? 8;
if (maxItems <= 0)
maxItems = 1;
if (maxItems > 64)
maxItems = 64;
result = await _liveSpeech.PollAsync(maxItems);
break;
case "vault.initialize":
var initPayload = DeserializePayload<VaultInitializePayload>(cmd.Payload);
if (initPayload is null || string.IsNullOrWhiteSpace(initPayload.Password) || string.IsNullOrWhiteSpace(initPayload.VaultDirectory))

View File

@ -58,6 +58,7 @@ public static class ServiceCollectionExtensions
message: $"Python speech sidecar unavailable: {ex.Message}");
}
});
services.AddSingleton<IS2TService, DisabledS2TService>();
services.AddSingleton<IEntryFileRepository, SqliteEntryFileRepository>();
services.AddSingleton<IEntryFileService, EntryFileService>();
services.AddSingleton<IListRepository, SqliteListRepository>();

View File

@ -0,0 +1,17 @@
using Journal.Core.Dtos;
namespace Journal.Core.Services.Speech;
public sealed class DisabledS2TService(string message = "S2T is disabled.") : IS2TService
{
private readonly string _message = string.IsNullOrWhiteSpace(message) ? "S2T is disabled." : message.Trim();
public Task<S2TStartResultDto> StartAsync(CancellationToken cancellationToken = default)
=> Task.FromResult(new S2TStartResultDto(false, "stopped", _message));
public Task<S2TStopResultDto> StopAsync(CancellationToken cancellationToken = default)
=> Task.FromResult(new S2TStopResultDto(false, "stopped", _message));
public Task<S2TPollResultDto> PollAsync(int maxItems = 8, CancellationToken cancellationToken = default)
=> Task.FromResult(new S2TPollResultDto([], false, "stopped", _message));
}

View File

@ -0,0 +1,10 @@
using Journal.Core.Dtos;
namespace Journal.Core.Services.Speech;
public interface IS2TService
{
Task<S2TStartResultDto> StartAsync(CancellationToken cancellationToken = default);
Task<S2TStopResultDto> StopAsync(CancellationToken cancellationToken = default);
Task<S2TPollResultDto> PollAsync(int maxItems = 8, CancellationToken cancellationToken = default);
}

View File

@ -1,12 +1,15 @@
using Microsoft.Extensions.DependencyInjection;
using Journal.Core;
using Journal.Core.Services.Speech;
using Journal.Core.Services.Sidecar;
using Journal.Sidecar;
Console.OutputEncoding = System.Text.Encoding.UTF8;
Console.InputEncoding = System.Text.Encoding.UTF8;
var services = new ServiceCollection();
services.AddFragmentServices();
services.AddSingleton<IS2TService, LocalWhisperS2TService>();
services.AddSingleton<Entry>();
var provider = services.BuildServiceProvider();

View File

@ -5,6 +5,8 @@
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IncludeNativeLibrariesForSelfExtract>true</IncludeNativeLibrariesForSelfExtract>
<IncludeAllContentForSelfExtract>true</IncludeAllContentForSelfExtract>
</PropertyGroup>
<ItemGroup>
@ -13,6 +15,9 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.3" />
<PackageReference Include="NAudio" Version="2.2.1" />
<PackageReference Include="Whisper.net" Version="1.9.0" />
<PackageReference Include="Whisper.net.Runtime" Version="1.9.0" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,270 @@
using System.Collections.Concurrent;
using Journal.Core.Dtos;
using Journal.Core.Services.Speech;
using NAudio.Wave;
using Whisper.net;
using Whisper.net.Ggml;
namespace Journal.Sidecar;
public sealed class LocalWhisperS2TService : IS2TService, IDisposable
{
private const int SampleRate = 16000;
private const int Bits = 16;
private const int Channels = 1;
private const int ChunkMs = 2000;
private const int MaxBufferedItems = 256;
private readonly object _sync = new();
private readonly object _segmentLock = new();
private readonly ConcurrentQueue<string> _transcripts = new();
private WaveInEvent? _waveIn;
private System.Timers.Timer? _flushTimer;
private MemoryStream? _segmentBuffer;
private BlockingCollection<byte[]>? _chunkQueue;
private CancellationTokenSource? _cts;
private Task? _worker;
private WhisperFactory? _factory;
private volatile bool _running;
private string _status = "stopped";
private string? _warning;
public async Task<S2TStartResultDto> StartAsync(CancellationToken cancellationToken = default)
{
lock (_sync)
{
if (_running)
return new S2TStartResultDto(true, _status, _warning);
_status = "starting";
_warning = null;
}
try
{
var modelPath = await EnsureModelAsync(cancellationToken);
lock (_sync)
{
_factory ??= WhisperFactory.FromPath(modelPath);
_segmentBuffer = new MemoryStream();
_chunkQueue = new BlockingCollection<byte[]>();
_cts = new CancellationTokenSource();
var waveFormat = new WaveFormat(SampleRate, Bits, Channels);
_waveIn = new WaveInEvent
{
DeviceNumber = -1,
WaveFormat = waveFormat,
BufferMilliseconds = 100
};
_waveIn.DataAvailable += HandleDataAvailable;
_flushTimer = new System.Timers.Timer(ChunkMs);
_flushTimer.Elapsed += (_, _) => FlushChunk(waveFormat);
_worker = Task.Run(() => RunWorkerAsync(waveFormat), _cts.Token);
_waveIn.StartRecording();
_flushTimer.Start();
_running = true;
_status = "listening";
}
}
catch (Exception ex)
{
lock (_sync)
{
_running = false;
_status = "error";
_warning = ex.Message;
}
return new S2TStartResultDto(false, "error", ex.Message);
}
return new S2TStartResultDto(true, "listening");
}
public async Task<S2TStopResultDto> StopAsync(CancellationToken cancellationToken = default)
{
WaveInEvent? waveIn;
System.Timers.Timer? flushTimer;
BlockingCollection<byte[]>? queue;
CancellationTokenSource? cts;
Task? worker;
lock (_sync)
{
if (!_running)
return new S2TStopResultDto(false, "stopped", _warning);
_running = false;
_status = "stopped";
waveIn = _waveIn;
flushTimer = _flushTimer;
queue = _chunkQueue;
cts = _cts;
worker = _worker;
_waveIn = null;
_flushTimer = null;
_chunkQueue = null;
_cts = null;
_worker = null;
}
try
{
flushTimer?.Stop();
if (waveIn is not null)
{
waveIn.DataAvailable -= HandleDataAvailable;
waveIn.StopRecording();
waveIn.Dispose();
}
queue?.CompleteAdding();
cts?.Cancel();
if (worker is not null)
{
await Task.WhenAny(worker, Task.Delay(1000, cancellationToken));
}
}
finally
{
flushTimer?.Dispose();
cts?.Dispose();
lock (_sync)
{
_segmentBuffer?.Dispose();
_segmentBuffer = null;
}
}
return new S2TStopResultDto(false, "stopped");
}
public Task<S2TPollResultDto> PollAsync(int maxItems = 8, CancellationToken cancellationToken = default)
{
if (maxItems <= 0)
maxItems = 1;
if (maxItems > 64)
maxItems = 64;
var items = new List<string>(maxItems);
while (items.Count < maxItems && _transcripts.TryDequeue(out var text))
{
items.Add(text);
}
return Task.FromResult(new S2TPollResultDto(items, _running, _status, _warning));
}
private void HandleDataAvailable(object? sender, WaveInEventArgs e)
{
lock (_segmentLock)
{
_segmentBuffer?.Write(e.Buffer, 0, e.BytesRecorded);
}
}
private void FlushChunk(WaveFormat waveFormat)
{
var queue = _chunkQueue;
if (queue is null || queue.IsAddingCompleted)
return;
byte[]? chunk = null;
lock (_segmentLock)
{
if (_segmentBuffer is null)
return;
if (_segmentBuffer.Length < waveFormat.AverageBytesPerSecond / 2)
return;
chunk = _segmentBuffer.ToArray();
_segmentBuffer.SetLength(0);
}
if (chunk is not null && chunk.Length > 0)
queue.Add(chunk);
}
private async Task RunWorkerAsync(WaveFormat waveFormat)
{
try
{
var queue = _chunkQueue;
if (queue is null || _factory is null)
return;
using var processor = _factory.CreateBuilder()
.WithLanguage("en")
.Build();
foreach (var pcmChunk in queue.GetConsumingEnumerable())
{
try
{
using var pcmStream = new MemoryStream(pcmChunk, writable: false);
using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
using var wavStream = new MemoryStream();
WaveFileWriter.WriteWavFileToStream(wavStream, raw);
wavStream.Position = 0;
await foreach (var result in processor.ProcessAsync(wavStream))
{
var text = result.Text?.Trim();
if (string.IsNullOrWhiteSpace(text))
continue;
EnqueueTranscript(text);
}
}
catch (Exception ex)
{
_warning = $"Transcription error: {ex.Message}";
}
}
}
catch (Exception ex)
{
lock (_sync)
{
_status = "error";
_warning = ex.Message;
}
}
}
private static async Task<string> EnsureModelAsync(CancellationToken cancellationToken)
{
var modelDirectory = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
"ProjectJournal",
"speech-models");
Directory.CreateDirectory(modelDirectory);
var modelPath = Path.Combine(modelDirectory, "ggml-base.en.bin");
if (File.Exists(modelPath))
return modelPath;
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(TimeSpan.FromMinutes(5));
using var modelStream = await WhisperGgmlDownloader.Default.GetGgmlModelAsync(
GgmlType.BaseEn,
cancellationToken: cts.Token);
using var fileWriter = File.OpenWrite(modelPath);
await modelStream.CopyToAsync(fileWriter, cts.Token);
return modelPath;
}
private void EnqueueTranscript(string text)
{
_transcripts.Enqueue(text);
while (_transcripts.Count > MaxBufferedItems && _transcripts.TryDequeue(out _))
{
}
}
public void Dispose()
{
StopAsync().GetAwaiter().GetResult();
_factory?.Dispose();
_factory = null;
}
}