add optimizer + rename bin

This commit is contained in:
rmanach 2025-10-24 16:22:18 +02:00
parent fff0d1b0dd
commit 02d9d60686
8 changed files with 570 additions and 25 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
/target
data

285
Cargo.lock generated
View File

@ -76,12 +76,24 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
[[package]]
name = "bumpalo"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "bytes"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "cc"
version = "1.2.41"
@ -157,12 +169,31 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "console"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys 0.61.2",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "env_filter"
version = "0.1.4"
@ -192,6 +223,49 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
[[package]]
name = "futures-core"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]]
name = "futures-macro"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
[[package]]
name = "futures-task"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-util"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [
"futures-core",
"futures-macro",
"futures-task",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -222,6 +296,19 @@ dependencies = [
"cc",
]
[[package]]
name = "indicatif"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e0ddd45fe8e09ee1a607920b12271f8a5528a41ecaf6e1d1440d6493315b6b"
dependencies = [
"console",
"portable-atomic",
"unicode-width",
"unit-prefix",
"web-time",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
@ -268,6 +355,15 @@ version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.28"
@ -296,6 +392,17 @@ dependencies = [
"unicase",
]
[[package]]
name = "mio"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
dependencies = [
"libc",
"wasi",
"windows-sys 0.61.2",
]
[[package]]
name = "num-traits"
version = "0.2.19"
@ -317,6 +424,56 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "optimg"
version = "0.1.0"
dependencies = [
"chrono",
"clap",
"env_logger",
"indicatif",
"log",
"mime_guess",
"tokio",
"tokio-util",
"walkdir",
]
[[package]]
name = "parking_lot"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "portable-atomic"
version = "1.11.1"
@ -350,6 +507,15 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.2"
@ -379,18 +545,6 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rs-optimg"
version = "0.1.0"
dependencies = [
"chrono",
"clap",
"env_logger",
"log",
"mime_guess",
"walkdir",
]
[[package]]
name = "rustversion"
version = "1.0.22"
@ -406,6 +560,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.228"
@ -441,6 +601,37 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
dependencies = [
"libc",
]
[[package]]
name = "slab"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
dependencies = [
"libc",
"windows-sys 0.60.2",
]
[[package]]
name = "strsim"
version = "0.11.1"
@ -458,6 +649,48 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
dependencies = [
"bytes",
"libc",
"mio",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys 0.61.2",
]
[[package]]
name = "tokio-macros"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokio-util"
version = "0.7.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
dependencies = [
"bytes",
"futures-core",
"futures-sink",
"futures-util",
"pin-project-lite",
"tokio",
]
[[package]]
name = "unicase"
version = "2.8.1"
@ -470,6 +703,18 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unit-prefix"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817"
[[package]]
name = "utf8parse"
version = "0.2.2"
@ -486,6 +731,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasm-bindgen"
version = "0.2.104"
@ -545,6 +796,16 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi-util"
version = "0.1.11"

View File

@ -1,5 +1,5 @@
[package]
name = "rs-optimg"
name = "optimg"
version = "0.1.0"
edition = "2024"
@ -13,6 +13,9 @@ lto = true
chrono = "0.4.42"
clap = { version = "4.5.50", features = ["derive"] }
env_logger = "0.11.8"
indicatif = "0.18.1"
log = "0.4.28"
mime_guess = "2.0.5"
tokio = { version = "1.48.0", features = ["full", "process", "rt-multi-thread", "sync", "signal"] }
tokio-util = { version = "0.7.16", features = ["rt"] }
walkdir = "2.5.0"

View File

@ -1,6 +1,6 @@
ROOT_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
PROG_NAME := rs-optimg
PROG_NAME := optimg
VERSION := $(shell cargo info $(PROG_NAME) | grep "^version" | cut -d ' ' -f2 | xargs)
LOG_LEVEL := debug

View File

@ -127,7 +127,7 @@ impl FileGroup {
}
pub fn add(&mut self, file: File) {
if self.files.get(&file.path).is_none() {
if !self.files.contains_key(&file.path) {
self.files.insert(file.path.clone(), file.clone());
self.nb_files += 1;
self.size += file.size;
@ -138,9 +138,13 @@ impl FileGroup {
self.nb_files
}
pub fn is_empty(&self) -> bool {
self.nb_files == 0
}
pub fn join(&mut self, right: &FileGroup) {
for (filepath, file) in &right.files {
if self.files.get(filepath).is_none() {
if !self.files.contains_key(filepath) {
self.files.insert(filepath.clone(), file.clone());
self.size += file.size;
self.nb_files += 1;
@ -237,6 +241,10 @@ impl Directory {
self.nb_files
}
pub fn is_empty(&self) -> bool {
self.nb_files == 0
}
pub fn show(&self) {
let mut data = vec![format!("directory ({}) details:", self.path)];
@ -271,8 +279,8 @@ impl Directory {
pub fn get_file_group(
&self,
mimetype: Option<&FileImgMimetype>,
size_range: Option<&FileSizeRange>,
mimetype: Option<FileImgMimetype>,
size_range: Option<FileSizeRange>,
) -> FileGroup {
let mut file_group = FileGroup::new();
@ -289,14 +297,14 @@ impl Directory {
}
(None, Some(size_range)) => {
for dict_file_range in self.details.values() {
if let Some(fg) = dict_file_range.get(size_range) {
if let Some(fg) = dict_file_range.get(&size_range) {
file_group.join(fg);
}
}
}
(Some(mime), Some(size_range)) => {
if let Some(dict_file_range) = self.details.get(mime.value())
&& let Some(fg) = dict_file_range.get(size_range)
&& let Some(fg) = dict_file_range.get(&size_range)
{
file_group.join(fg);
}

View File

@ -1,3 +1,5 @@
mod file;
mod optimizer;
pub use file::Directory;
pub use file::{Directory, File, FileGroup, FileImgMimetype, FileSizeRange};
pub use optimizer::ImgOptimizer;

View File

@ -1,20 +1,46 @@
use clap::Parser;
use env_logger::Env;
use rs_optimg::Directory;
use optimg::{Directory, FileImgMimetype, FileSizeRange, ImgOptimizer};
const DEFAULT_DEST_DIR: &str = "data";
// TODO(rmanach): provide args to select file size and mimetype
// TODO(rmanach): provide arg to show directory details (.show())
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Args {
/// Directory to scan
#[clap(long, required = true)]
src: String,
/// Number of workers
#[clap(long, default_value_t = 5)]
workers: usize,
/// Destination directory to write optimized files
#[clap(long, default_value_t = DEFAULT_DEST_DIR.to_string())]
dest: String,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let args = Args::parse();
let dir = Directory::from_path(&args.src)?;
dir.show();
let directory = Directory::from_path(&args.src)?;
let optimizer = ImgOptimizer::new(&args.dest, args.workers);
let file_group =
directory.get_file_group(Some(FileImgMimetype::Jpeg), Some(FileSizeRange::Tiny));
let result = optimizer.optimize(&file_group).await?;
let (optimized, percent, size) = result.stats();
log::info!(
"optimized: {}, Percent saved: {:.2}%, Size saved: {:.2} Mb",
optimized,
percent,
size
);
Ok(())
}

244
src/optimizer.rs Normal file
View File

@ -0,0 +1,244 @@
use indicatif::{ProgressBar, ProgressStyle};
use std::collections::VecDeque;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use tokio::process::Command;
use tokio::sync::{Mutex, RwLock};
use tokio_util::{sync::CancellationToken, task::TaskTracker};
use super::{File, FileGroup, FileImgMimetype};
/// Optimize the `file` with `jpegoptim` and put the result in
/// `dest_dir` directory keeping file path.
// TODO(rmanach): add argument to set the size or leave it empty for loseless optim
async fn jpeg_optim(dest_dir: &String, file: &File) -> Result<(File, Option<File>), String> {
let filepath = file.path.split("/").collect::<Vec<_>>();
let filepath = filepath[..filepath.len() - 1].join("/");
let filepath = filepath.trim_start_matches("..");
// replace all spaces in dir name for readability
let dest_dir = std::path::Path::new(&dest_dir)
.join(filepath.trim_start_matches("/"))
.to_string_lossy()
.replace(" ", "_");
std::fs::create_dir_all(&dest_dir)
.map_err(|e| format!("unable to create dest dir: {}, err: {}", dest_dir, e))?;
// # use "-S <i>k" to set maximum size in kilobytes
let cmd = format!(
"jpegoptim -s -p -q -S 1024k '{}' -d {}",
file.path, dest_dir
);
log::debug!("optimization launched for file: {} -> {}", file.path, cmd);
let output = Command::new("sh")
.arg("-c")
.arg(&cmd)
.output()
.await
.map_err(|e| format!("error while running command: {}, err: {}", cmd, e))?;
if !output.status.success() {
log::error!(
"error while running command: {}, err: {:?}",
cmd,
String::from_utf8_lossy(&output.stderr)
);
return Ok((file.clone(), None));
}
match File::from_directory(&dest_dir, &file.name) {
Ok(file_optim) => Ok((file.clone(), Some(file_optim))),
Err(e) => {
log::debug!(
"unable to get file: {} after optimization: {}",
file.path,
e
);
Ok((file.clone(), None))
}
}
}
/// Entry point of `file` optimization selection the handler.
async fn optim(dest_dir: &String, file: &File) -> Result<(File, Option<File>), String> {
if file.mimetype == FileImgMimetype::Jpeg.value() {
return jpeg_optim(dest_dir, file).await;
}
Ok((file.clone(), None))
}
#[derive(Debug)]
pub struct Queue<T> {
items: Arc<Mutex<VecDeque<T>>>,
}
impl<T: Send> Queue<T> {
pub fn new() -> Self {
Queue {
items: Arc::new(Mutex::new(VecDeque::new())),
}
}
pub async fn enqueue(&self, item: T) {
let mut items = self.items.lock().await;
items.push_back(item);
}
pub async fn pop(&self) -> Option<T> {
let mut items = self.items.lock().await;
items.pop_front()
}
}
/// Optimization result.
/// Handle the original `FileGroup` and the optimized `FileGroup`.
#[derive(Debug, Clone)]
pub struct OptimizerResult {
pub orig: FileGroup,
pub opti: FileGroup,
pub optimized: usize,
}
impl OptimizerResult {
/// Returns the basics statistics of the optimization.
pub fn stats(&self) -> (usize, f64, f64) {
let orig_size = self.orig.get_size();
let percent = if orig_size > 0.0 {
(1.0 - (self.opti.get_size() / orig_size)) * 100.0
} else {
0.0
};
let size = orig_size - self.opti.get_size();
(self.optimized, percent, size)
}
}
/// Wraps the optimization of JPEG and PNG files
/// using `jpegoptim` and `optipng` on process pool.
///
/// Example:
/// ```rs
/// let optimizer = ImgOptimizer::new("dest_dir", 5);
/// let result = optimizer.optimize(&file_group).await?;
/// let (optimized, percent, size) = result.stats();
/// ```
#[derive(Debug, Clone)]
pub struct ImgOptimizer {
dest_dir: String,
nb_workers: usize,
queue: Arc<Queue<File>>,
cancel_token: CancellationToken,
tracker: TaskTracker,
}
impl ImgOptimizer {
pub fn new(dest_dir: &str, nb_workers: usize) -> Self {
ImgOptimizer {
dest_dir: dest_dir.to_string(),
nb_workers,
queue: Arc::new(Queue::new()),
cancel_token: CancellationToken::new(),
tracker: TaskTracker::new(),
}
}
pub async fn stop(&self) {
log::warn!("stopping optimizer...");
self.cancel_token.cancel();
}
pub async fn optimize(
&self,
file_group: &FileGroup,
) -> Result<OptimizerResult, Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let results = Arc::new(RwLock::new(FileGroup::new()));
let optimized = Arc::new(AtomicUsize::new(0));
for file in file_group.get_files() {
self.queue.enqueue(file).await;
}
let pb = Arc::new(ProgressBar::new(file_group.len() as u64));
pb.set_style(
ProgressStyle::default_bar()
.template("{msg} [{bar:40}] {pos}/{len} ({eta})")?
.progress_chars("##-"),
);
pb.set_message("optimizing...");
for _ in 0..self.nb_workers {
let queue = Arc::clone(&self.queue);
let results = Arc::clone(&results);
let optimized = Arc::clone(&optimized);
let pb = Arc::clone(&pb);
let cancel_token = self.cancel_token.clone();
let dest_dir = self.dest_dir.clone();
self.tracker.spawn(async move {
loop {
tokio::select! {
res = queue.pop() => {
if res.is_none() {
break;
}
let file = res.unwrap();
match optim(&dest_dir, &file).await {
Ok((orig, None)) => {
log::debug!("no optimization for file: {}", orig.path);
results.write().await.add(orig);
}
Ok((orig, Some(opti))) => {
log::debug!("optimization for file: {} -> {:.2}%", orig.path, (1.0 - (opti.size / orig.size)) * 100.0);
results.write().await.add(opti);
optimized.fetch_add(1, Ordering::Relaxed);
}
Err(e) => log::error!("optimization error for file: {}, err: {}", file.path, e),
}
pb.inc(1);
}
_ = cancel_token.cancelled() => {
break;
}
}
}
});
}
// TODO(rmanach): move it on main not here
tokio::spawn({
let pb = Arc::clone(&pb);
let optimizer = self.clone();
async move {
if let Err(e) = tokio::signal::ctrl_c().await {
log::error!("unexpected error while handling SIGINT interrupt, err: {e}");
}
log::warn!("interrupt signal received");
optimizer.stop().await;
pb.finish_with_message("optimization interrupted");
}
});
self.tracker.close();
self.tracker.wait().await;
pb.finish_with_message("optimization complete");
log::info!(
"optimization finished in {:.2}s",
start.elapsed().as_secs_f64()
);
Ok(OptimizerResult {
orig: file_group.clone(),
opti: results.read().await.clone(),
optimized: optimized.load(Ordering::Relaxed),
})
}
}