diff --git a/README.md b/README.md index 6bc06c5..d8fb2f8 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ cargo run -r --example yolo # blip, clip, yolop, svtr, db, ... ## Integrate into your own project -### 1. Add `usls` as a dependency to your project's `Cargo.toml` +### Add `usls` as a dependency to your project's `Cargo.toml` ```Shell cargo add usls diff --git a/assets/truck.jpg b/assets/truck.jpg new file mode 100644 index 0000000..6b98688 Binary files /dev/null and b/assets/truck.jpg differ diff --git a/examples/sam/main.rs b/examples/sam/main.rs new file mode 100644 index 0000000..ce3b377 --- /dev/null +++ b/examples/sam/main.rs @@ -0,0 +1,51 @@ +use usls::{ + models::{SamPrompt, SAM}, + Annotator, DataLoader, Options, +}; + +fn main() -> Result<(), Box> { + // encoder + let options_encoder = Options::default() + // .with_cpu() + .with_i00((1, 1, 1).into()) + .with_model("mobile-sam-vit-t-encoder.onnx")?; + + // decoder + let options_decoder = Options::default() + // .with_cpu() + .with_i11((1, 1, 1).into()) + .with_i21((1, 1, 1).into()) + .with_find_contours(true) // find contours or not + .with_model("mobile-sam-vit-t-decoder.onnx")?; + + // build model + let mut model = SAM::new(options_encoder, options_decoder)?; + + // build dataloader + let dl = DataLoader::default() + .with_batch(model.batch() as _) + .load("./assets/truck.jpg")?; + + // build annotator + let annotator = Annotator::default() + .without_bboxes_name(true) + .without_bboxes_conf(true) + .without_mbrs_name(true) + .without_mbrs_conf(true) + .with_saveout("SAM"); + + // run & annotate + for (xs, _paths) in dl { + // prompt + let prompts = vec![ + SamPrompt::default() + // .with_postive_point(774., 366.), // postive point + // .with_negative_point(774., 366.), // negative point + .with_bbox(215., 297., 643., 459.), // bbox + ]; + let ys = model.run(&xs, &prompts)?; + annotator.annotate(&xs, &ys); + } + + Ok(()) +} diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs index 1879a1f..ebe9895 100644 --- a/examples/yolo/main.rs +++ b/examples/yolo/main.rs @@ -152,6 +152,7 @@ fn main() -> Result<()> { .with_confs(&[0.2, 0.15]) // class_0: 0.4, others: 0.15 // .with_names(&coco::NAMES_80) .with_names2(&coco::KEYPOINTS_NAMES_17) + .with_find_contours(false) // find contours or not .with_profile(args.profile); let mut model = YOLO::new(options)?; @@ -164,7 +165,7 @@ fn main() -> Result<()> { let annotator = Annotator::default() .with_skeletons(&coco::SKELETONS_16) .with_bboxes_thickness(4) - .without_masks(true) // No masks plotting. + .without_masks(false) // No masks plotting when doing segment task. .with_saveout("YOLO-Series"); // run & annotate diff --git a/src/core/annotator.rs b/src/core/annotator.rs index 42b6cf8..bbe67a4 100644 --- a/src/core/annotator.rs +++ b/src/core/annotator.rs @@ -340,13 +340,6 @@ impl Annotator { } } - // masks - if !self.without_masks { - if let Some(xs) = &y.masks() { - self.plot_masks(&mut img_rgba, xs); - } - } - // bboxes if !self.without_bboxes { if let Some(xs) = &y.bboxes() { @@ -368,6 +361,13 @@ impl Annotator { } } + // masks + if !self.without_masks { + if let Some(xs) = &y.masks() { + self.plot_masks(&mut img_rgba, xs); + } + } + // probs if let Some(xs) = &y.probs() { self.plot_probs(&mut img_rgba, xs); diff --git a/src/core/engine.rs b/src/core/engine.rs index 3f5c151..576344d 100644 --- a/src/core/engine.rs +++ b/src/core/engine.rs @@ -100,6 +100,30 @@ impl OrtEngine { (3, 3) => Self::_set_ixx(x, &config.i33, i, ii).unwrap_or(x_default), (3, 4) => Self::_set_ixx(x, &config.i34, i, ii).unwrap_or(x_default), (3, 5) => Self::_set_ixx(x, &config.i35, i, ii).unwrap_or(x_default), + (4, 0) => Self::_set_ixx(x, &config.i40, i, ii).unwrap_or(x_default), + (4, 1) => Self::_set_ixx(x, &config.i41, i, ii).unwrap_or(x_default), + (4, 2) => Self::_set_ixx(x, &config.i42, i, ii).unwrap_or(x_default), + (4, 3) => Self::_set_ixx(x, &config.i43, i, ii).unwrap_or(x_default), + (4, 4) => Self::_set_ixx(x, &config.i44, i, ii).unwrap_or(x_default), + (4, 5) => Self::_set_ixx(x, &config.i45, i, ii).unwrap_or(x_default), + (5, 0) => Self::_set_ixx(x, &config.i50, i, ii).unwrap_or(x_default), + (5, 1) => Self::_set_ixx(x, &config.i51, i, ii).unwrap_or(x_default), + (5, 2) => Self::_set_ixx(x, &config.i52, i, ii).unwrap_or(x_default), + (5, 3) => Self::_set_ixx(x, &config.i53, i, ii).unwrap_or(x_default), + (5, 4) => Self::_set_ixx(x, &config.i54, i, ii).unwrap_or(x_default), + (5, 5) => Self::_set_ixx(x, &config.i55, i, ii).unwrap_or(x_default), + (6, 0) => Self::_set_ixx(x, &config.i60, i, ii).unwrap_or(x_default), + (6, 1) => Self::_set_ixx(x, &config.i61, i, ii).unwrap_or(x_default), + (6, 2) => Self::_set_ixx(x, &config.i62, i, ii).unwrap_or(x_default), + (6, 3) => Self::_set_ixx(x, &config.i63, i, ii).unwrap_or(x_default), + (6, 4) => Self::_set_ixx(x, &config.i64_, i, ii).unwrap_or(x_default), + (6, 5) => Self::_set_ixx(x, &config.i65, i, ii).unwrap_or(x_default), + (7, 0) => Self::_set_ixx(x, &config.i70, i, ii).unwrap_or(x_default), + (7, 1) => Self::_set_ixx(x, &config.i71, i, ii).unwrap_or(x_default), + (7, 2) => Self::_set_ixx(x, &config.i72, i, ii).unwrap_or(x_default), + (7, 3) => Self::_set_ixx(x, &config.i73, i, ii).unwrap_or(x_default), + (7, 4) => Self::_set_ixx(x, &config.i74, i, ii).unwrap_or(x_default), + (7, 5) => Self::_set_ixx(x, &config.i75, i, ii).unwrap_or(x_default), _ => todo!(), }; v_.push(x); @@ -290,6 +314,12 @@ impl OrtEngine { TensorElementType::Int64 => { ort::Value::from_array(x.mapv(|x_| x_ as i64).view())?.into_dyn() } + TensorElementType::Uint8 => { + ort::Value::from_array(x.mapv(|x_| x_ as u8).view())?.into_dyn() + } + TensorElementType::Int8 => { + ort::Value::from_array(x.mapv(|x_| x_ as i8).view())?.into_dyn() + } _ => todo!(), }; xs_.push(Into::>::into(x_)); diff --git a/src/core/options.rs b/src/core/options.rs index cb6e866..d31232a 100644 --- a/src/core/options.rs +++ b/src/core/options.rs @@ -39,7 +39,30 @@ pub struct Options { pub i33: Option, pub i34: Option, pub i35: Option, - + pub i40: Option, + pub i41: Option, + pub i42: Option, + pub i43: Option, + pub i44: Option, + pub i45: Option, + pub i50: Option, + pub i51: Option, + pub i52: Option, + pub i53: Option, + pub i54: Option, + pub i55: Option, + pub i60: Option, + pub i61: Option, + pub i62: Option, + pub i63: Option, + pub i64_: Option, + pub i65: Option, + pub i70: Option, + pub i71: Option, + pub i72: Option, + pub i73: Option, + pub i74: Option, + pub i75: Option, // trt related pub trt_engine_cache_enable: bool, pub trt_int8_enable: bool, @@ -63,6 +86,7 @@ pub struct Options { pub yolo_task: Option, pub yolo_version: Option, pub yolo_preds: Option, + pub find_contours: bool, } impl Default for Options { @@ -96,6 +120,30 @@ impl Default for Options { i33: None, i34: None, i35: None, + i40: None, + i41: None, + i42: None, + i43: None, + i44: None, + i45: None, + i50: None, + i51: None, + i52: None, + i53: None, + i54: None, + i55: None, + i60: None, + i61: None, + i62: None, + i63: None, + i64_: None, + i65: None, + i70: None, + i71: None, + i72: None, + i73: None, + i74: None, + i75: None, trt_engine_cache_enable: true, trt_int8_enable: false, trt_fp16_enable: false, @@ -116,6 +164,7 @@ impl Default for Options { yolo_task: None, yolo_version: None, yolo_preds: None, + find_contours: false, } } } @@ -171,6 +220,11 @@ impl Options { self } + pub fn with_find_contours(mut self, x: bool) -> Self { + self.find_contours = x; + self + } + pub fn with_names(mut self, names: &[&str]) -> Self { self.names = Some(names.iter().map(|x| x.to_string()).collect::>()); self @@ -360,4 +414,124 @@ impl Options { self.i35 = Some(x); self } + + pub fn with_i40(mut self, x: MinOptMax) -> Self { + self.i40 = Some(x); + self + } + + pub fn with_i41(mut self, x: MinOptMax) -> Self { + self.i41 = Some(x); + self + } + + pub fn with_i42(mut self, x: MinOptMax) -> Self { + self.i42 = Some(x); + self + } + + pub fn with_i43(mut self, x: MinOptMax) -> Self { + self.i43 = Some(x); + self + } + + pub fn with_i44(mut self, x: MinOptMax) -> Self { + self.i44 = Some(x); + self + } + + pub fn with_i45(mut self, x: MinOptMax) -> Self { + self.i45 = Some(x); + self + } + + pub fn with_i50(mut self, x: MinOptMax) -> Self { + self.i50 = Some(x); + self + } + + pub fn with_i51(mut self, x: MinOptMax) -> Self { + self.i51 = Some(x); + self + } + + pub fn with_i52(mut self, x: MinOptMax) -> Self { + self.i52 = Some(x); + self + } + + pub fn with_i53(mut self, x: MinOptMax) -> Self { + self.i53 = Some(x); + self + } + + pub fn with_i54(mut self, x: MinOptMax) -> Self { + self.i54 = Some(x); + self + } + + pub fn with_i55(mut self, x: MinOptMax) -> Self { + self.i55 = Some(x); + self + } + + pub fn with_i60(mut self, x: MinOptMax) -> Self { + self.i60 = Some(x); + self + } + + pub fn with_i61(mut self, x: MinOptMax) -> Self { + self.i61 = Some(x); + self + } + + pub fn with_i62(mut self, x: MinOptMax) -> Self { + self.i62 = Some(x); + self + } + + pub fn with_i63(mut self, x: MinOptMax) -> Self { + self.i63 = Some(x); + self + } + + pub fn with_i64(mut self, x: MinOptMax) -> Self { + self.i64_ = Some(x); + self + } + + pub fn with_i65(mut self, x: MinOptMax) -> Self { + self.i65 = Some(x); + self + } + + pub fn with_i70(mut self, x: MinOptMax) -> Self { + self.i70 = Some(x); + self + } + + pub fn with_i71(mut self, x: MinOptMax) -> Self { + self.i71 = Some(x); + self + } + + pub fn with_i72(mut self, x: MinOptMax) -> Self { + self.i72 = Some(x); + self + } + + pub fn with_i73(mut self, x: MinOptMax) -> Self { + self.i73 = Some(x); + self + } + + pub fn with_i74(mut self, x: MinOptMax) -> Self { + self.i74 = Some(x); + self + } + + pub fn with_i75(mut self, x: MinOptMax) -> Self { + self.i75 = Some(x); + self + } } diff --git a/src/core/x.rs b/src/core/x.rs index b98706d..e6b39ba 100644 --- a/src/core/x.rs +++ b/src/core/x.rs @@ -14,6 +14,12 @@ impl From> for X { } } +impl From> for X { + fn from(x: Vec) -> Self { + Self(Array::from_vec(x).into_dyn().into_owned()) + } +} + impl std::ops::Deref for X { type Target = Array; diff --git a/src/models/db.rs b/src/models/db.rs index 2e14b2d..68351fa 100644 --- a/src/models/db.rs +++ b/src/models/db.rs @@ -119,31 +119,31 @@ impl DB { continue; } - let mask = Polygon::default().with_points_imageproc(&contour.points); - let delta = mask.area() * ratio.round() as f64 * self.unclip_ratio as f64 - / mask.perimeter(); + let polygon = Polygon::default().with_points_imageproc(&contour.points); + let delta = polygon.area() * ratio.round() as f64 * self.unclip_ratio as f64 + / polygon.perimeter(); // TODO: optimize - let mask = mask + let polygon = polygon .unclip(delta, image_width as f64, image_height as f64) .resample(50) // .simplify(6e-4) .convex_hull(); - if let Some(bbox) = mask.bbox() { + if let Some(bbox) = polygon.bbox() { if bbox.height() < self.min_height || bbox.width() < self.min_width { continue; } - let confidence = mask.area() as f32 / bbox.area(); + let confidence = polygon.area() as f32 / bbox.area(); if confidence < self.confs[0] { continue; } y_bbox.push(bbox.with_confidence(confidence).with_id(0)); - if let Some(mbr) = mask.mbr() { + if let Some(mbr) = polygon.mbr() { y_mbrs.push(mbr.with_confidence(confidence).with_id(0)); } - y_polygons.push(mask.with_id(0)); + y_polygons.push(polygon.with_id(0)); } else { continue; } diff --git a/src/models/mod.rs b/src/models/mod.rs index df8c9d9..c45e555 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -8,6 +8,7 @@ mod dinov2; mod modnet; mod rtdetr; mod rtmo; +mod sam; mod svtr; mod yolo; mod yolo_; @@ -21,10 +22,8 @@ pub use dinov2::Dinov2; pub use modnet::MODNet; pub use rtdetr::RTDETR; pub use rtmo::RTMO; +pub use sam::{SamPrompt, SAM}; pub use svtr::SVTR; pub use yolo::YOLO; pub use yolo_::*; -// { -// AnchorsPosition, BoxType, ClssType, KptsType, YOLOFormat, YOLOPreds, YOLOTask, YOLOVersion, -// }; pub use yolop::YOLOPv2; diff --git a/src/models/sam.rs b/src/models/sam.rs new file mode 100644 index 0000000..35a07e6 --- /dev/null +++ b/src/models/sam.rs @@ -0,0 +1,208 @@ +use anyhow::Result; +use image::DynamicImage; +use ndarray::{Array, Axis}; +use rand::prelude::*; + +use crate::{Bbox, DynConf, Mask, Mbr, MinOptMax, Ops, Options, OrtEngine, Polygon, X, Y}; + +#[derive(Debug, Default, Clone)] +pub struct SamPrompt { + points: Vec, + labels: Vec, +} + +impl SamPrompt { + pub fn with_postive_point(mut self, x: f32, y: f32) -> Self { + self.points.extend_from_slice(&[x, y]); + self.labels.push(1.); + self + } + + pub fn with_negative_point(mut self, x: f32, y: f32) -> Self { + self.points.extend_from_slice(&[x, y]); + self.labels.push(0.); + self + } + + pub fn with_bbox(mut self, x: f32, y: f32, x2: f32, y2: f32) -> Self { + self.points.extend_from_slice(&[x, y, x2, y2]); + self.labels.extend_from_slice(&[2., 3.]); + self + } + + pub fn point_coords(&self, r: f32) -> Result { + let point_coords = Array::from_shape_vec((1, self.num_points(), 2), self.points.clone())? + .into_dyn() + .into_owned(); + Ok(X::from(point_coords * r)) + } + + pub fn point_labels(&self) -> Result { + let point_labels = Array::from_shape_vec((1, self.num_points()), self.labels.clone())? + .into_dyn() + .into_owned(); + Ok(X::from(point_labels)) + } + + pub fn num_points(&self) -> usize { + self.points.len() / 2 + } +} + +#[derive(Debug)] +pub struct SAM { + encoder: OrtEngine, + decoder: OrtEngine, + height: MinOptMax, + width: MinOptMax, + batch: MinOptMax, + pub conf: DynConf, + find_contours: bool, +} + +impl SAM { + pub fn new(options_encoder: Options, options_decoder: Options) -> Result { + let mut encoder = OrtEngine::new(&options_encoder)?; + let mut decoder = OrtEngine::new(&options_decoder)?; + let (batch, height, width) = ( + encoder.inputs_minoptmax()[0][0].to_owned(), + encoder.inputs_minoptmax()[0][2].to_owned(), + encoder.inputs_minoptmax()[0][3].to_owned(), + ); + let conf = DynConf::new(&options_decoder.confs, 1); + + encoder.dry_run()?; + decoder.dry_run()?; + + Ok(Self { + encoder, + decoder, + batch, + height, + width, + conf, + find_contours: options_decoder.find_contours, + }) + } + + pub fn run(&mut self, xs: &[DynamicImage], prompts: &[SamPrompt]) -> Result> { + let ys = self.encode(xs)?; + self.decode(ys, xs, prompts) + } + + pub fn encode(&mut self, xs: &[DynamicImage]) -> Result> { + let xs_ = X::apply(&[ + Ops::Letterbox( + xs, + self.height() as u32, + self.width() as u32, + "Bilinear", + 0, + "auto", + false, + ), + Ops::Standardize(&[123.675, 116.28, 103.53], &[58.395, 57.12, 57.375], 3), + Ops::Nhwc2nchw, + ])?; + self.encoder.run(vec![xs_]) + } + + pub fn decode( + &mut self, + xs: Vec, + xs0: &[DynamicImage], + prompts: &[SamPrompt], + ) -> Result> { + let mut ys: Vec = Vec::new(); + for (idx, image_embedding) in xs[0].axis_iter(Axis(0)).enumerate() { + let image_width = xs0[idx].width() as f32; + let image_height = xs0[idx].height() as f32; + let ratio = + (self.width() as f32 / image_width).min(self.height() as f32 / image_height); + + let ys_ = self.decoder.run(vec![ + X::from(image_embedding.into_dyn().into_owned()).insert_axis(0)?, // image_embedding + prompts[idx].point_coords(ratio)?, // point_coords + prompts[idx].point_labels()?, // point_labels + X::zeros(&[1, 1, self.height_low_res() as _, self.width_low_res() as _]), // mask_input, + X::zeros(&[1]), // has_mask_input + X::from(vec![image_height, image_width]), // orig_im_size + ])?; + + let mut y_masks: Vec = Vec::new(); + let mut y_polygons: Vec = Vec::new(); + let mut y_bboxes: Vec = Vec::new(); + let mut y_mbrs: Vec = Vec::new(); + + for (mask, iou) in ys_[0].axis_iter(Axis(0)).zip(ys_[1].axis_iter(Axis(0))) { + if iou[0] < self.conf[0] { + continue; + } + let luma = mask + .map(|x| if *x > 0. { 255u8 } else { 0u8 }) + .into_raw_vec(); + let luma: image::ImageBuffer, Vec<_>> = + match image::ImageBuffer::from_raw(image_width as _, image_height as _, luma) { + None => continue, + Some(x) => x, + }; + + // contours + let mut rng = thread_rng(); + let id = rng.gen_range(0..=255); + if self.find_contours { + let contours: Vec> = + imageproc::contours::find_contours_with_threshold(&luma, 0); + for c in contours.iter() { + let polygon = Polygon::default().with_points_imageproc(&c.points); + if let Some(bbox) = polygon.bbox() { + y_bboxes.push(bbox.with_confidence(iou[0]).with_id(id)); + }; + if let Some(mbr) = polygon.mbr() { + y_mbrs.push(mbr.with_confidence(iou[0]).with_id(id)); + } + y_polygons.push(polygon.with_confidence(iou[0]).with_id(id)); + } + } + y_masks.push(Mask::default().with_mask(luma).with_id(id)); + } + + let mut y = Y::default(); + if !y_masks.is_empty() { + y = y.with_masks(&y_masks); + } + if !y_polygons.is_empty() { + y = y.with_polygons(&y_polygons); + } + if !y_bboxes.is_empty() { + y = y.with_bboxes(&y_bboxes); + } + if !y_mbrs.is_empty() { + y = y.with_mbrs(&y_mbrs); + } + ys.push(y); + } + + Ok(ys) + } + + pub fn width_low_res(&self) -> usize { + self.width() as usize / 4 + } + + pub fn height_low_res(&self) -> usize { + self.height() as usize / 4 + } + + pub fn batch(&self) -> isize { + self.batch.opt + } + + pub fn width(&self) -> isize { + self.width.opt + } + + pub fn height(&self) -> isize { + self.height.opt + } +} diff --git a/src/models/yolo.rs b/src/models/yolo.rs index d44a7c0..56d5554 100644 --- a/src/models/yolo.rs +++ b/src/models/yolo.rs @@ -24,6 +24,7 @@ pub struct YOLO { names_kpt: Option>, task: YOLOTask, layout: YOLOPreds, + find_contours: bool, version: Option, } @@ -153,6 +154,7 @@ impl Vision for YOLO { names_kpt, layout, version, + find_contours: options.find_contours, }) } @@ -417,7 +419,6 @@ impl Vision for YOLO { .into_par_iter() .filter_map(|bbox| { let coefs = coefs.slice(s![bbox.id_born(), ..]).to_vec(); - let proto = protos.as_ref()?.slice(s![idx, .., .., ..]); let (nm, mh, mw) = proto.dim(); @@ -461,10 +462,9 @@ impl Vision for YOLO { } // Find contours - let contours: Vec> = - imageproc::contours::find_contours_with_threshold(&mask, 0); - - Some(( + let polygons = if self.find_contours { + let contours: Vec> = + imageproc::contours::find_contours_with_threshold(&mask, 0); contours .into_par_iter() .map(|x| { @@ -473,7 +473,13 @@ impl Vision for YOLO { .with_points_imageproc(&x.points) .with_name(bbox.name().cloned()) }) - .max_by(|x, y| x.area().total_cmp(&y.area()))?, + .max_by(|x, y| x.area().total_cmp(&y.area()))? + } else { + Polygon::default() + }; + + Some(( + polygons, Mask::default() .with_mask(mask) .with_id(bbox.id()) @@ -482,7 +488,12 @@ impl Vision for YOLO { }) .collect::<(Vec<_>, Vec<_>)>(); - y = y.with_polygons(&y_polygons).with_masks(&y_masks); + if !y_polygons.is_empty() { + y = y.with_polygons(&y_polygons); + } + if !y_masks.is_empty() { + y = y.with_masks(&y_masks); + } } } diff --git a/src/ys/polygon.rs b/src/ys/polygon.rs index 2f8fef4..f4df61d 100644 --- a/src/ys/polygon.rs +++ b/src/ys/polygon.rs @@ -64,6 +64,11 @@ impl Polygon { self } + pub fn with_confidence(mut self, x: f32) -> Self { + self.confidence = x; + self + } + pub fn id(&self) -> isize { self.id }