feat(katana): proper name & series reading

This commit is contained in:
2024-01-05 00:40:57 +07:00
parent 65911d3706
commit d1cee333ed
5 changed files with 245 additions and 104 deletions

View File

@ -1,57 +1,194 @@
use image::imageops::colorops::contrast_in_place;
use image::io::Reader as ImageReader;
use image::ImageFormat;
use once_cell::sync::Lazy;
use regex::Regex;
use serenity::model::channel::Message;
use serenity::prelude::*;
use std::io::Cursor;
use std::sync::{Arc, Mutex};
use std::thread;
use std::{env, thread};
use swordfish_common::structs::Card;
use swordfish_common::tesseract;
use swordfish_common::{debug, error, info, trace, warn};
use swordfish_common::{trace, warn};
static TEXT_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"[A-Za-z ]").unwrap()
});
static TEXT_NUM_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[A-Za-z0-9]").unwrap());
static ALLOWED_CHARS_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"['-: ]").unwrap());
pub struct Card {
wishlist: Option<i32>,
name: String,
series: String,
print: i32,
fn replace_string(text: &mut String, from: &str, to: &str) -> bool {
match text.find(from) {
Some(i) => {
text.replace_range(i..i + from.len(), to);
true
}
None => false,
}
}
pub fn analyze_card(card: image::DynamicImage) {
fn fix_tesseract_string(text: &mut String) {
// Remove the \n
trace!("Text: {}", text);
if text.ends_with("\n") {
text.pop();
}
// Workaround for a bug the text
// e.g. "We Never Learn\nN" -> "We Never Learn"
trace!("Text: {}", text);
if text.ends_with("\nN") {
for _ in 0..2 {
text.pop();
}
}
// Replace first (to prevent "byte index 13 is not a char boundary; it is inside '—' (bytes 11..14)")
while replace_string(text, "", "-") {
trace!("Replacing '—' with '-'");
}
// Workaround for a bug the text
trace!("Text: {}", text);
if text.starts_with("- ") || text.starts_with("-.") {
text.remove(0);
text.remove(0);
}
// Workaround IR -> Ik
// Maybe it only occurs if Ik is in the start of the string?
// e.g. "IReda" -> "Ikeda"
trace!("Text: {}", text);
replace_string(text, "IR", "Ik");
// Workaround for "A\n"
// This is usually the corner of the card
trace!("Text: {}", text);
replace_string(text, "A\n", "");
// Workaround for "\n." (and others in the future)
for (i, c) in text.clone().chars().enumerate() {
if c != '\n' {
continue;
}
let prev_char = match text.chars().nth(i - 1) {
Some(c) => c,
None => continue,
};
let next_char = match text.chars().nth(i + 1) {
Some(c) => c,
None => break,
};
let mut rm_prev: bool = false;
trace!("Prev char: {}", prev_char);
if ['-'].contains(&prev_char) {
rm_prev = true;
text.remove(i - 1);
}
trace!("Next char: {}", next_char);
if ['.'].contains(&next_char) {
if rm_prev {
text.remove(i);
} else {
text.remove(i + 1);
}
}
}
// Replace "\n" with " "
trace!("Text: {}", text);
replace_string(text, "\n", " ");
// Remove all non-alphanumeric characters
trace!("Text: {}", text);
text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || ALLOWED_CHARS_REGEX.is_match(&c.to_string()));
// Fix "mn" -> "III"
trace!("Text: {}", text);
if text.ends_with("mn") {
text.pop();
text.pop();
text.push_str("III");
}
// Fix "1ll" -> "III"
trace!("Text: {}", text);
replace_string(text, "1ll", "III");
// Replace multiple spaces with one space
trace!("Text: {}", text);
while replace_string(text, " ", " ") {
trace!("Removing multiple spaces");
}
// Workaround if the first character is a space
trace!("Text: {}", text);
while text.starts_with(" ") {
trace!("Removing leading space");
text.remove(0);
}
trace!("Text (final): {}", text);
}
fn save_image_if_trace(img: &image::DynamicImage, path: &str) {
let log_lvl = match env::var("LOG_LEVEL") {
Ok(log_lvl) => log_lvl,
Err(_) => return,
};
if log_lvl == "trace" {
match img.save(path) {
Ok(_) => {
trace!("Saved image to {}", path);
}
Err(why) => {
warn!("{}", format!("Failed to save image: {:?}", why))
}
};
}
}
pub fn analyze_card(card: image::DynamicImage, count: u32) -> Card {
trace!("Spawning threads for analyzing card...");
// Read the name and the series
let card_clone = card.clone();
let name_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let name_img = card_clone.crop_imm(22, 26, 202 - 22, 70 - 26);
name_img.save("debug/4-name.png").unwrap();
leptess.set_image_from_mem(&name_img.as_bytes()).unwrap();
leptess.get_utf8_text().expect("Failed to read name")
// let binding = tesseract::init_tesseract_quick(false);
// let mut leptess = binding.lock().unwrap();
let name_img = card_clone.crop_imm(22, 26, 204 - 22, 70 - 26);
let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match name_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&name_img, format!("debug/4-{}-name.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut name_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut name_str);
name_str
});
let card_clone = card.clone();
let series_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let series_img = card_clone.crop_imm(22, 276, 202 - 22, 330 - 276);
series_img.save("debug/4-series.png").unwrap();
leptess.set_image_from_mem(&series_img.as_bytes()).unwrap();
let series = leptess.get_utf8_text().unwrap();
// let binding = tesseract::init_tesseract_quick(false);
// let mut leptess = binding.lock().unwrap();
let series_img = card_clone.crop_imm(22, 276, 204 - 22, 330 - 276);
let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match series_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&series_img, format!("debug/4-{}-series.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut series_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut series_str);
series_str
});
let name = name_thread.join().unwrap();
trace!("Name: {}", name);
let series = series_thread.join().unwrap();
trace!("Series: {}", name);
// Read the print number
trace!("Series: {}", series);
// TODO: Read the print number
// TODO: Read the wishlist number (from our database)
return Card {
wishlist: None,
name,
series,
print: 0,
};
}
pub async fn analyze_drop_message(
leptess_arc: &Arc<Mutex<tesseract::LepTess>>,
message: &Message,
) -> Result<(), String> {
pub async fn analyze_drop_message(message: &Message) -> Result<Vec<Card>, String> {
if message.attachments.len() < 1 {
return Err("No attachments found".to_string());
};
@ -62,49 +199,39 @@ pub async fn analyze_drop_message(
Err(why) => return Err(format!("Failed to download attachment: {:?}", why)),
};
// Pre-process the image
let mut img = match ImageReader::new(Cursor::new(image_bytes)).with_guessed_format() {
Ok(reader) => match reader.decode() {
let mut img =
match ImageReader::with_format(Cursor::new(image_bytes), ImageFormat::Png).decode() {
Ok(img) => img,
Err(why) => return Err(format!("Failed to decode image: {:?}", why)),
},
Err(why) => return Err(format!("Failed to read image: {:?}", why)),
};
};
trace!("Grayscaling image...");
img = img.grayscale();
img.save("debug/1-grayscale.png").unwrap();
save_image_if_trace(&img, "debug/1-grayscale.png");
trace!("Increasing contrast of the image...");
img = img.adjust_contrast(1.0);
img.save("debug/2-contrast.png").unwrap();
contrast_in_place(&mut img, 127.0);
save_image_if_trace(&img, "debug/2-contrast.png");
// Cropping cards
let distance = 257 - 29 + 305 - 259;
let cards_count = img.width() / distance;
trace!("Cropping {} cards...", cards_count);
let mut jobs: Vec<_> = Vec::new();
for i_real in 0..cards_count {
let i = i_real.clone();
let leptess_mutex = leptess_arc.clone();
let img = img.clone();
let mut cards: Vec<Card> = Vec::with_capacity(cards_count.try_into().unwrap());
for index in 0..cards_count {
let i = index.clone();
let x = 29 + distance * i;
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
save_image_if_trace(&card_img, &format!("debug/3-cropped-{}.png", i));
let job = move || {
Ok({
let x = 29 + distance * i;
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
match card_img.save(format!("debug/3-cropped-{}.png", i)) {
Ok(_) => {
trace!("Saved cropped card {}", i);
let leptess = leptess_mutex.lock().unwrap();
analyze_card(card_img);
}
Err(why) => return Err(format!("Failed to save image: {:?}", why)),
};
})
trace!("Analyzing card {}", i);
Ok((i, analyze_card(card_img, i)))
};
jobs.push(job);
}
let mut tasks: Vec<thread::JoinHandle<Result<(), String>>> = Vec::new();
let mut tasks: Vec<thread::JoinHandle<Result<(u32, Card), String>>> = Vec::new();
for job in jobs {
let task = thread::spawn(job);
tasks.push(task);
@ -112,15 +239,17 @@ pub async fn analyze_drop_message(
for task in tasks {
let result = task.join();
match result {
Ok(_) => (),
Err(why) => return Err(format!("Failed to crop card: {:?}", why)),
Ok(result) => {
match result {
Ok((i, card)) => {
trace!("Finished analyzing card {}", i);
cards.push(card);
}
Err(why) => return Err(format!("Failed to analyze card: {}", why)),
};
}
Err(why) => return Err(format!("Failed to analyze card: {:?}", why)),
};
}
let leptess_mutex = leptess_arc.clone();
let mut leptess = leptess_mutex.lock().unwrap();
match leptess.set_image_from_mem(&img.as_bytes()) {
Ok(_) => (),
Err(why) => return Err(format!("Failed to set image: {:?}", why)),
};
Ok(())
Ok(cards)
}