feat: swordfish-user

Also move tesseract implementation to swordfish cuz only the bot uses it.
This commit is contained in:
2024-01-07 19:19:40 +07:00
parent ebafd93110
commit 8eb622f479
12 changed files with 291 additions and 22 deletions

View File

@ -6,9 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
leptess = "0.14.0"
log = "0.4.20"
rusty-tesseract = "1.1.9"
serde = "1.0.195"
tokio = "1.35.1"
tracing = "0.1.40"

View File

@ -1,12 +1,12 @@
#![feature(lazy_cell)]
#![feature(string_remove_matches)]
pub use log;
pub use tokio;
pub use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::{self, fmt, EnvFilter};
pub mod constants;
pub mod database;
pub mod structs;
pub mod tesseract;
pub mod utils;
pub fn setup_logger(level: &str) -> Result<(), ()> {

View File

@ -1,72 +0,0 @@
pub use leptess::{LepTess, Variable};
use std::{
sync::{Arc, LazyLock, Mutex},
thread,
};
static TESSERACT: LazyLock<Arc<Mutex<LepTess>>> = LazyLock::new(|| {
let mut lep_tess = match LepTess::new(None, "eng") {
Ok(lep_tess) => lep_tess,
Err(why) => panic!("{}", format!("Failed to initialize Tesseract: {:?}", why)),
};
// lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap();
// Use LSTM only.
lep_tess
.set_variable(Variable::TesseditOcrEngineMode, "2")
.unwrap();
Arc::new(Mutex::new(lep_tess))
});
static mut TESSERACT_VEC: Vec<Arc<Mutex<LepTess>>> = Vec::new();
///
/// Get a Tesseract instance.
///
/// Deprecated because it provides no performance benefit, if you really need
/// then use get_tesseract_from_vec.
///
pub fn get_tesseract(numeric_only: bool) -> Arc<Mutex<LepTess>> {
TESSERACT.clone()
}
pub unsafe fn get_tesseract_from_vec(numeric_only: bool) -> Arc<Mutex<LepTess>> {
let lep_tess: Arc<Mutex<LepTess>>;
if TESSERACT_VEC.len() == 0 {
for _ in 0..3 {
let num_only = numeric_only.clone();
thread::spawn(move || {
let ocr = init_tesseract(num_only).unwrap();
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess = Arc::new(Mutex::new(init_tesseract(numeric_only).unwrap()));
} else {
lep_tess = TESSERACT_VEC.pop().unwrap();
thread::spawn(move || unsafe {
let ocr = init_tesseract(numeric_only).unwrap();
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess
}
pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
let mut lep_tess = match LepTess::new(None, "eng") {
Ok(lep_tess) => lep_tess,
Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)),
};
lep_tess
.set_variable(Variable::TesseditPagesegMode, "6")
.unwrap();
// Use LSTM only.
lep_tess
.set_variable(Variable::TesseditOcrEngineMode, "1")
.unwrap();
if numeric_only {
match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") {
Ok(_) => (),
Err(why) => return Err(format!("Failed to set whitelist: {:?}", why)),
};
}
Ok(lep_tess)
}

View File

@ -1,2 +0,0 @@
pub mod libtesseract;
pub mod subprocess;

View File

@ -1,33 +0,0 @@
pub use rusty_tesseract;
pub use rusty_tesseract::{Args, Image};
use std::{collections::HashMap, sync::LazyLock};
static TESSERACT_ARGS: LazyLock<Args> = LazyLock::new(|| Args {
lang: "eng".to_string(),
config_variables: HashMap::new(),
psm: Some(6),
dpi: None,
oem: Some(1),
});
static TESSERACT_NUMERIC_ARGS: LazyLock<Args> = LazyLock::new(|| Args {
lang: "eng".to_string(),
config_variables: HashMap::from([("tessedit_char_whitelist".into(), "0123456789".into())]),
psm: Some(6),
dpi: None,
oem: Some(1),
});
pub fn image_to_string(image: &Image) -> Result<String, String> {
match rusty_tesseract::image_to_string(image, &TESSERACT_ARGS) {
Ok(text) => Ok(text),
Err(why) => Err(format!("Failed to OCR image: {:?}", why)),
}
}
pub fn image_to_numeric_string(image: &Image) -> Result<String, String> {
match rusty_tesseract::image_to_string(image, &TESSERACT_NUMERIC_ARGS) {
Ok(text) => Ok(text),
Err(why) => Err(format!("Failed to OCR image: {:?}", why)),
}
}