fix(tesseract/regex): support unicode characters

Mostly from the first part of regexify_text
This commit is contained in:
2024-02-10 11:17:56 +07:00
parent 5ae36d7f2a
commit c72d2cf16b
5 changed files with 26 additions and 8 deletions

9
Cargo.lock generated
View File

@ -935,7 +935,7 @@ dependencies = [
"httpdate",
"itoa",
"pin-project-lite",
"socket2 0.4.10",
"socket2 0.5.5",
"tokio",
"tower-service",
"tracing",
@ -2226,6 +2226,7 @@ dependencies = [
"swordfish-common",
"tokio",
"toml",
"unicode-segmentation",
]
[[package]]
@ -2762,6 +2763,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "untrusted"
version = "0.9.0"