feat: initial work on MongoDB support

This commit is contained in:
2024-01-06 00:03:23 +07:00
parent 60039bbaa5
commit 30a7098bed
6 changed files with 687 additions and 39 deletions

View File

@ -6,10 +6,12 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
fern = "0.6.2"
humantime = "2.1.0"
leptess = "0.14.0"
log = "0.4.20"
rusty-tesseract = "1.1.9"
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
[dependencies.mongodb]
version = "2.8.0"
features = ["tokio-runtime"]

View File

@ -0,0 +1,29 @@
use mongodb::Collection;
use crate::database;
use crate::structs::Card;
use std::sync::OnceLock;
static KATANA: OnceLock<Collection<Card>> = OnceLock::new();
///
/// Initialize the "katana" collection in MongoDB
///
/// This method is called automatically when you initialize the
/// database module.
///
pub fn init() {
KATANA
.set(
database::MONGO_DATABASE
.get()
.unwrap()
.collection::<Card>("katana"),
)
.unwrap();
}
fn query_card() {
todo!("Query card from database");
println!("{:?}", card);
}

View File

@ -0,0 +1,37 @@
pub mod katana;
use mongodb::options::ClientOptions;
use mongodb::{Client, Database};
use std::env;
use std::sync::OnceLock;
use tracing::info;
static MONGO_CLIENT: OnceLock<Client> = OnceLock::new();
static MONGO_DATABASE: OnceLock<Database> = OnceLock::new();
async fn init() {
let mut options =
ClientOptions::parse(env::var("MONGODB_URL").expect("MongoDB url must be provided"))
.await
.unwrap();
match env::var("MONGODB_USERNAME") {
Ok(username) => {
options.credential = Some(
mongodb::options::Credential::builder()
.username(username)
.password(
env::var("MONGODB_PASSWORD").expect("MongoDB password must be provided"),
)
.build(),
);
}
Err(_) => {
info!("No MongoDB username provided, using authentication provided in the url");
}
}
MONGO_CLIENT
.set(Client::with_options(options).unwrap())
.unwrap();
MONGO_DATABASE.set(MONGO_CLIENT.get().unwrap().database("swordfish"));
katana::init();
}

View File

@ -3,10 +3,11 @@ pub use log;
pub use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::{self, fmt, EnvFilter};
pub mod constants;
pub mod database;
pub mod structs;
pub mod tesseract;
pub fn setup_logger(level: &str) -> Result<(), fern::InitError> {
pub fn setup_logger(level: &str) -> Result<(), ()> {
let formatter = fmt::format()
.with_level(true)
.with_target(true)

View File

@ -1,7 +1,8 @@
pub use leptess::{LepTess, Variable};
use std::{sync::{
Arc, Mutex, LazyLock
}, thread};
use std::{
sync::{Arc, LazyLock, Mutex},
thread,
};
static TESSERACT: LazyLock<Arc<Mutex<LepTess>>> = LazyLock::new(|| {
let mut lep_tess = match LepTess::new(None, "eng") {
@ -10,7 +11,9 @@ static TESSERACT: LazyLock<Arc<Mutex<LepTess>>> = LazyLock::new(|| {
};
// lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap();
// Use LSTM only.
lep_tess.set_variable(Variable::TesseditOcrEngineMode, "2").unwrap();
lep_tess
.set_variable(Variable::TesseditOcrEngineMode, "2")
.unwrap();
Arc::new(Mutex::new(lep_tess))
});
@ -18,10 +21,10 @@ static mut TESSERACT_VEC: Vec<Arc<Mutex<LepTess>>> = Vec::new();
///
/// Get a Tesseract instance.
///
///
/// Deprecated because it provides no performance benefit, if you really need
/// then use get_tesseract_from_vec.
///
///
pub fn get_tesseract(numeric_only: bool) -> Arc<Mutex<LepTess>> {
TESSERACT.clone()
}
@ -37,8 +40,7 @@ pub unsafe fn get_tesseract_from_vec(numeric_only: bool) -> Arc<Mutex<LepTess>>
});
}
lep_tess = Arc::new(Mutex::new(init_tesseract(numeric_only).unwrap()));
}
else {
} else {
lep_tess = TESSERACT_VEC.pop().unwrap();
thread::spawn(move || unsafe {
let ocr = init_tesseract(numeric_only).unwrap();
@ -53,9 +55,13 @@ pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
Ok(lep_tess) => lep_tess,
Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)),
};
lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap();
lep_tess
.set_variable(Variable::TesseditPagesegMode, "6")
.unwrap();
// Use LSTM only.
lep_tess.set_variable(Variable::TesseditOcrEngineMode, "1").unwrap();
lep_tess
.set_variable(Variable::TesseditOcrEngineMode, "1")
.unwrap();
if numeric_only {
match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") {
Ok(_) => (),