From 85c90b043679039f2897b7144806eb1ef4687a5c Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Mon, 13 May 2024 18:43:57 +0800
Subject: [PATCH 1/6] =?UTF-8?q?fix(gpu):=20=F0=9F=90=9B=20Retrieve=20the?=
 =?UTF-8?q?=20texture=20limit=20size=20from=20the=20GPU=20instead=20of=20u?=
 =?UTF-8?q?sing=20a=20hardcoded=20valued?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md                        |  4 ++
 core/src/builtin_widgets.rs         |  1 -
 gpu/src/gpu_backend.rs              |  2 +-
 gpu/src/gpu_backend/atlas.rs        | 80 ++++++++++++++++++++---------
 gpu/src/gpu_backend/textures_mgr.rs | 26 +++++++---
 gpu/src/lib.rs                      |  2 +
 gpu/src/wgpu_impl.rs                |  9 +++-
 7 files changed, 90 insertions(+), 34 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53e9884bf..5414cbf6c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,10 @@ Please only add new entries below the [Unreleased](#unreleased---releasedate) he
 - **core**: Introduced `StateWatcher` for watching state modifies, which was previously the responsibility of `StateReader`. This results in a cleaner and more compact `StateReader` implementation. (#556, @M-Adoo)
 - **gpu**: Introduced `GPUBackendImpl::max_textures_per_draw` to set a limit on textures per draw phase (#562 @M-Adoo)
 
+### Fixed
+
+- **gpu**: Retrieve the texture limit size from the GPU instead of using a hardcoded value. (#pr, @M-Adoo)
+
 ### Changed
 
 - **macros**: polish the compile error message of invalid filed in `@$var {}` (#556 @M-Adoo)
diff --git a/core/src/builtin_widgets.rs b/core/src/builtin_widgets.rs
index 39956672c..66832dbe5 100644
--- a/core/src/builtin_widgets.rs
+++ b/core/src/builtin_widgets.rs
@@ -171,7 +171,6 @@ impl<T> FatObj<T> {
 
   /// Maps an `FatObj<T>` to `FatObj<V>` by applying a function to the host
   /// object.
-  #[inline]
   #[track_caller]
   pub fn map<V>(self, f: impl FnOnce(T) -> V) -> FatObj<V> {
     FatObj {
diff --git a/gpu/src/gpu_backend.rs b/gpu/src/gpu_backend.rs
index 92c7fc159..9aadcb847 100644
--- a/gpu/src/gpu_backend.rs
+++ b/gpu/src/gpu_backend.rs
@@ -376,7 +376,7 @@ where
     let prefer_cache_size = prefer_cache_size(&path.path, &path.transform);
 
     let (mask, mask_to_view) =
-      if valid_cache_item(&prefer_cache_size) || view.contains_rect(&paint_bounds) {
+      if self.tex_mgr.is_good_for_cache(prefer_cache_size) || view.contains_rect(&paint_bounds) {
         self
           .tex_mgr
           .store_alpha_path(path.path, &path.transform, &mut self.gpu_impl)
diff --git a/gpu/src/gpu_backend/atlas.rs b/gpu/src/gpu_backend/atlas.rs
index 2a54c94d4..d9d7051fe 100644
--- a/gpu/src/gpu_backend/atlas.rs
+++ b/gpu/src/gpu_backend/atlas.rs
@@ -9,10 +9,6 @@ use slab::Slab;
 use super::Texture;
 use crate::GPUBackendImpl;
 
-pub const ATLAS_MAX_ITEM: DeviceSize = DeviceSize::new(512, 512);
-pub const ATLAS_MIN_SIZE: DeviceSize = DeviceSize::new(1024, 1024);
-pub const ATLAS_MAX_SIZE: DeviceSize = DeviceSize::new(4096, 4096);
-
 #[derive(Copy, Clone, Debug, PartialEq)]
 enum AtlasDist {
   Atlas(Allocation),
@@ -25,10 +21,16 @@ pub struct AtlasHandle<Attr> {
   atlas_dist: AtlasDist,
 }
 
+pub(crate) struct AtlasConfig {
+  label: &'static str,
+  min_size: DeviceSize,
+  max_size: DeviceSize,
+}
+
 pub(crate) struct Atlas<T: Texture, K, Attr> {
+  config: AtlasConfig,
   atlas_allocator: AtlasAllocator,
   texture: T,
-  label: &'static str,
   cache: FrameCache<K, AtlasHandle<Attr>>,
   extras: Slab<T>,
   islands: Vec<AtlasHandle<Attr>>,
@@ -53,13 +55,14 @@ where
   K: Hash + Eq,
 {
   pub fn new(
-    label: &'static str, format: ColorFormat, anti_aliasing: AntiAliasing, gpu_impl: &mut T::Host,
+    config: AtlasConfig, format: ColorFormat, anti_aliasing: AntiAliasing, gpu_impl: &mut T::Host,
   ) -> Self {
-    let texture = gpu_impl.new_texture(ATLAS_MIN_SIZE, anti_aliasing, format);
+    let min_size = config.min_size;
+    let texture = gpu_impl.new_texture(min_size, anti_aliasing, format);
     Self {
-      label,
+      config,
       texture,
-      atlas_allocator: AtlasAllocator::new(ATLAS_MIN_SIZE.cast_unit()),
+      atlas_allocator: AtlasAllocator::new(min_size.cast_unit()),
       cache: FrameCache::new(),
       extras: Slab::default(),
       islands: vec![],
@@ -92,7 +95,7 @@ where
     if alloc.is_none() {
       let expand_size = (current_size * 2)
         .max(current_size)
-        .min(ATLAS_MAX_SIZE);
+        .min(self.config.max_size);
       if expand_size != self.texture.size() {
         self.atlas_allocator.grow(expand_size.cast_unit());
         let mut new_tex = gpu_impl.new_texture(
@@ -161,10 +164,15 @@ where
     self.extras.clear();
   }
 
+  pub fn is_good_size_to_alloc(&self, size: DeviceSize) -> bool {
+    (!size.greater_than(self.config.max_size).any())
+      && size.area() <= self.config.max_size.area() / 4
+  }
+
   pub(crate) fn end_frame(&mut self) {
     self
       .cache
-      .end_frame(self.label)
+      .end_frame(&self.config.label)
       .for_each(|h| release_handle!(self, h));
     self
       .islands
@@ -173,6 +181,12 @@ where
   }
 }
 
+impl AtlasConfig {
+  pub fn new(label: &'static str, max_size: DeviceSize) -> Self {
+    Self { label, min_size: max_size / 4, max_size }
+  }
+}
+
 impl<Attr> AtlasHandle<Attr> {
   pub fn tex_id(&self) -> usize {
     match &self.atlas_dist {
@@ -202,9 +216,14 @@ mod tests {
   #[test]
   fn atlas_grow_to_alloc() {
     let mut gpu_impl = block_on(WgpuImpl::headless());
-    let mut atlas =
-      Atlas::<WgpuTexture, _, _>::new("_", ColorFormat::Alpha8, AntiAliasing::None, &mut gpu_impl);
-    let size = DeviceSize::new(ATLAS_MIN_SIZE.width + 1, 16);
+    let mut atlas = Atlas::<WgpuTexture, _, _>::new(
+      AtlasConfig::new("", DeviceSize::new(4096, 4096)),
+      ColorFormat::Alpha8,
+      AntiAliasing::None,
+      &mut gpu_impl,
+    );
+
+    let size = DeviceSize::new(atlas.config.min_size.width + 1, 16);
     let h = atlas.allocate(1, (), size, &mut gpu_impl);
     gpu_impl.end_frame();
     assert_eq!(h.tex_id(), 0);
@@ -213,8 +232,12 @@ mod tests {
   #[test]
   fn resource_clear() {
     let mut wgpu = block_on(WgpuImpl::headless());
-    let mut atlas =
-      Atlas::<WgpuTexture, _, _>::new("_", ColorFormat::Rgba8, AntiAliasing::None, &mut wgpu);
+    let mut atlas = Atlas::<WgpuTexture, _, _>::new(
+      AtlasConfig::new("", DeviceSize::new(4096, 4096)),
+      ColorFormat::Rgba8,
+      AntiAliasing::None,
+      &mut wgpu,
+    );
     atlas.allocate(1, (), DeviceSize::new(32, 32), &mut wgpu);
     atlas.allocate(2, (), DeviceSize::new(4097, 16), &mut wgpu);
     atlas.end_frame();
@@ -228,8 +251,12 @@ mod tests {
   #[test]
   fn fix_scale_path_cache_miss() {
     let mut wgpu = block_on(WgpuImpl::headless());
-    let mut atlas =
-      Atlas::<WgpuTexture, _, _>::new("_", ColorFormat::Rgba8, AntiAliasing::None, &mut wgpu);
+    let mut atlas = Atlas::<WgpuTexture, _, _>::new(
+      AtlasConfig::new("", DeviceSize::new(4096, 4096)),
+      ColorFormat::Rgba8,
+      AntiAliasing::None,
+      &mut wgpu,
+    );
     atlas.allocate(1, (), DeviceSize::new(32, 32), &mut wgpu);
     atlas.allocate(1, (), DeviceSize::new(512, 512), &mut wgpu); // before the frame end, two allocation for key(1) should keep.
     let mut alloc_count = 0;
@@ -251,8 +278,12 @@ mod tests {
   #[test]
   fn fix_atlas_expand_overlap() {
     let mut wgpu = block_on(WgpuImpl::headless());
-    let mut atlas =
-      Atlas::<WgpuTexture, _, _>::new("_", ColorFormat::Alpha8, AntiAliasing::None, &mut wgpu);
+    let mut atlas = Atlas::<WgpuTexture, _, _>::new(
+      AtlasConfig::new("", DeviceSize::new(4096, 4096)),
+      ColorFormat::Alpha8,
+      AntiAliasing::None,
+      &mut wgpu,
+    );
     let icon = DeviceSize::new(32, 32);
     atlas.allocate(1, (), icon, &mut wgpu);
 
@@ -260,13 +291,14 @@ mod tests {
       .texture
       .write_data(&DeviceRect::from_size(icon), &[1; 32 * 32], &mut wgpu);
 
+    let min_size = atlas.config.min_size;
     // force atlas to expand
-    let h = atlas.allocate(2, (), ATLAS_MIN_SIZE, &mut wgpu);
+    let h = atlas.allocate(2, (), min_size, &mut wgpu);
     let second_rect = h.tex_rect(&atlas);
-    const SECOND_AREA: usize = (ATLAS_MIN_SIZE.width * ATLAS_MIN_SIZE.height) as usize;
+    let second_area: usize = (min_size.width * min_size.height) as usize;
     atlas
       .texture
-      .write_data(&second_rect, &[2; SECOND_AREA], &mut wgpu);
+      .write_data(&second_rect, &vec![2; second_area], &mut wgpu);
     let img = atlas
       .texture
       .copy_as_image(&DeviceRect::from_size(atlas.size()), &mut wgpu);
@@ -281,7 +313,7 @@ mod tests {
         .iter()
         .map(|v| *v as usize)
         .sum::<usize>(),
-      icon.area() as usize + SECOND_AREA * 2
+      icon.area() as usize + second_area * 2
     )
   }
 }
diff --git a/gpu/src/gpu_backend/textures_mgr.rs b/gpu/src/gpu_backend/textures_mgr.rs
index 05437fd82..ec94d37b8 100644
--- a/gpu/src/gpu_backend/textures_mgr.rs
+++ b/gpu/src/gpu_backend/textures_mgr.rs
@@ -13,10 +13,10 @@ use ribir_painter::{
 };
 
 use super::{
-  atlas::{Atlas, AtlasHandle},
+  atlas::{Atlas, AtlasConfig, AtlasHandle},
   Texture,
 };
-use crate::{add_draw_rect_vertices, gpu_backend::atlas::ATLAS_MAX_ITEM, GPUBackendImpl};
+use crate::{add_draw_rect_vertices, GPUBackendImpl};
 const TOLERANCE: f32 = 0.1_f32;
 const PAR_CHUNKS_SIZE: usize = 64;
 
@@ -75,14 +75,30 @@ where
   T::Host: GPUBackendImpl<Texture = T>,
 {
   pub(super) fn new(gpu_impl: &mut T::Host, anti_aliasing: AntiAliasing) -> Self {
+    let max_size = gpu_impl.texture_size_limit();
+
     Self {
-      alpha_atlas: Atlas::new("Alpha atlas", ColorFormat::Alpha8, anti_aliasing, gpu_impl),
-      rgba_atlas: Atlas::new("Rgba atlas", ColorFormat::Rgba8, AntiAliasing::None, gpu_impl),
+      alpha_atlas: Atlas::new(
+        AtlasConfig::new("Alpha atlas", max_size),
+        ColorFormat::Alpha8,
+        anti_aliasing,
+        gpu_impl,
+      ),
+      rgba_atlas: Atlas::new(
+        AtlasConfig::new("Rgba atlas", max_size),
+        ColorFormat::Rgba8,
+        AntiAliasing::None,
+        gpu_impl,
+      ),
       fill_task: <_>::default(),
       fill_task_buffers: <_>::default(),
     }
   }
 
+  pub(super) fn is_good_for_cache(&self, size: DeviceSize) -> bool {
+    self.alpha_atlas.is_good_size_to_alloc(size)
+  }
+
   pub(super) fn set_anti_aliasing(&mut self, anti_aliasing: AntiAliasing, host: &mut T::Host) {
     self
       .alpha_atlas
@@ -358,8 +374,6 @@ where
   TextureSlice { tex_id: TextureID::Rgba(h.tex_id()), rect: h.tex_rect(atlas) }
 }
 
-pub(crate) fn valid_cache_item(size: &DeviceSize) -> bool { size.lower_than(ATLAS_MAX_ITEM).any() }
-
 fn extend_buffer<V>(dist: &mut VertexBuffers<V>, from: VertexBuffers<V>) {
   if dist.vertices.is_empty() {
     dist.vertices.extend(from.vertices);
diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs
index 7a389e52a..80d1d4ce0 100644
--- a/gpu/src/lib.rs
+++ b/gpu/src/lib.rs
@@ -87,6 +87,8 @@ pub trait GPUBackendImpl {
   #[inline]
   fn load_tex_limit_per_draw(&self) -> usize { 8 }
 
+  fn texture_size_limit(&self) -> DeviceSize;
+
   /// Create a texture.
   fn new_texture(
     &mut self, size: DeviceSize, anti_aliasing: AntiAliasing, format: ColorFormat,
diff --git a/gpu/src/wgpu_impl.rs b/gpu/src/wgpu_impl.rs
index c91eae466..25998c525 100644
--- a/gpu/src/wgpu_impl.rs
+++ b/gpu/src/wgpu_impl.rs
@@ -62,6 +62,11 @@ const TEX_PER_DRAW: usize = 8;
 impl GPUBackendImpl for WgpuImpl {
   type Texture = WgpuTexture;
 
+  fn texture_size_limit(&self) -> DeviceSize {
+    let limits = self.device.limits();
+    DeviceSize::new(limits.max_texture_dimension_2d as i32, limits.max_texture_dimension_2d as i32)
+  }
+
   fn load_tex_limit_per_draw(&self) -> usize { TEX_PER_DRAW }
 
   fn begin_frame(&mut self) {
@@ -538,7 +543,7 @@ impl WgpuImpl {
         force_fallback_adapter: false,
       })
       .await
-      .unwrap();
+      .expect("No suitable GPU adapters found on the system!");
 
     let (device, queue) = adapter
       .request_device(
@@ -546,7 +551,7 @@ impl WgpuImpl {
         None,
       )
       .await
-      .unwrap();
+      .expect("Unable to find a suitable GPU adapter!");
 
     let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
       address_mode_u: wgpu::AddressMode::ClampToEdge,

From 7d161470ad5bb69b5b3dd342e22139b1c229342a Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Mon, 13 May 2024 18:45:50 +0800
Subject: [PATCH 2/6] =?UTF-8?q?refactor(gpu):=20=F0=9F=92=A1=20keep=20prim?=
 =?UTF-8?q?itives=20aligned=20by=2016=20bytes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gpu/src/gpu_backend.rs                        | 16 ++---
 gpu/src/lib.rs                                | 43 +++++-------
 gpu/src/wgpu_impl/shaders/img_triangles.wgsl  | 32 ++++-----
 .../shaders/linear_gradient_triangles.wgsl    | 65 +++++++++++++------
 .../shaders/radial_gradient_triangles.wgsl    | 50 ++++++++++----
 5 files changed, 118 insertions(+), 88 deletions(-)

diff --git a/gpu/src/gpu_backend.rs b/gpu/src/gpu_backend.rs
index 9aadcb847..aac4595fd 100644
--- a/gpu/src/gpu_backend.rs
+++ b/gpu/src/gpu_backend.rs
@@ -194,16 +194,15 @@ where
           let img_slice = self.tex_mgr.store_image(&img, &mut self.gpu_impl);
           let img_start = img_slice.rect.origin.to_f32().to_array();
           let img_size = img_slice.rect.size.to_f32().to_array();
-          let img_tex_idx = self.tex_ids_map.tex_idx(img_slice.tex_id);
+          let mask_head_and_tex_idx =
+            (mask_head as i32) << 16 | self.tex_ids_map.tex_idx(img_slice.tex_id) as i32;
           let prim_idx = self.img_prims.len() as u32;
           let prim = ImgPrimitive {
             transform: ts.inverse().unwrap().to_array(),
             img_start,
             img_size,
-            img_tex_idx,
-            mask_head,
+            mask_head_and_tex_idx,
             opacity,
-            _dummy: 0,
           };
           self.img_prims.push(prim);
           let buffer = &mut self.img_vertices_buffer;
@@ -242,14 +241,15 @@ where
       PaintCommand::LinearGradient { path, linear_gradient } => {
         let ts = path.transform;
         if let Some((rect, mask_head)) = self.new_mask_layer(path) {
+          let stop = (self.linear_gradient_stops.len() << 16 | linear_gradient.stops.len()) as u32;
+          let mask_head_and_spread =
+            (mask_head as i32) << 16 | linear_gradient.spread_method as i32;
           let prim: LinearGradientPrimitive = LinearGradientPrimitive {
             transform: ts.inverse().unwrap().to_array(),
-            stop_start: self.linear_gradient_stops.len() as u32,
-            stop_cnt: linear_gradient.stops.len() as u32,
+            stop,
             start_position: linear_gradient.start.to_array(),
             end_position: linear_gradient.end.to_array(),
-            mask_head,
-            spread: linear_gradient.spread_method as u32,
+            mask_head_and_spread,
           };
           self.linear_gradient_stops.extend(
             linear_gradient
diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs
index 80d1d4ce0..2738b0102 100644
--- a/gpu/src/lib.rs
+++ b/gpu/src/lib.rs
@@ -191,23 +191,13 @@ pub struct LinearGradientPrimIndex(u32);
 #[repr(packed)]
 #[derive(AsBytes, PartialEq, Clone, Copy, Debug)]
 pub struct GradientStopPrimitive {
-  pub red: f32,
-  pub green: f32,
-  pub blue: f32,
-  pub alpha: f32,
+  pub color: u32,
   pub offset: f32,
 }
 
 impl From<GradientStop> for GradientStopPrimitive {
   fn from(stop: GradientStop) -> Self {
-    let color = stop.color.into_f32_components();
-    GradientStopPrimitive {
-      red: color[0],
-      green: color[1],
-      blue: color[2],
-      alpha: color[3],
-      offset: stop.offset,
-    }
+    GradientStopPrimitive { color: stop.color.into_u32(), offset: stop.offset }
   }
 }
 
@@ -242,19 +232,19 @@ pub struct LinearGradientPrimitive {
   /// A 2x3 column-major matrix, transform a vertex position to the texture
   /// position
   pub transform: [f32; 6],
-  /// The color stop's start index
-  pub stop_start: u32,
-  /// The size of the color stop
-  pub stop_cnt: u32,
   /// position of the start center
   pub start_position: [f32; 2],
   /// position of the end center
   pub end_position: [f32; 2],
-  /// The index of the head mask layer.
-  pub mask_head: i32,
-  /// the spread method of the gradient. 0 for pad, 1 for reflect and 2
-  /// for repeat
-  pub spread: u32,
+  /// The color stop information, there are two parts:
+  /// - The high 16-bit index represents the start index of the color stop.
+  /// - The low 16-bit index represents the size of the color stop.
+  pub stop: u32,
+  /// A mix of two 16-bit values:
+  /// - The high 16-bit index represents the head mask layer.
+  /// - The low 16-bit represents the spread method of the gradient. 0 for pad,
+  ///   1 for reflect and 2 for repeat
+  pub mask_head_and_spread: i32,
 }
 
 #[repr(packed)]
@@ -267,15 +257,12 @@ pub struct ImgPrimitive {
   pub img_start: [f32; 2],
   /// The size of the image image.
   pub img_size: [f32; 2],
-  /// The index of texture, `load_textures` method provide all textures
-  /// a draw phase need.
-  pub img_tex_idx: u32,
-  /// The index of the head mask layer.
-  pub mask_head: i32,
+  /// This represents a mix of two 16-bit indices:
+  /// - The high 16-bit index represents the head mask layer. It is an i16.
+  /// - The low 16-bit index represents the texture. It is a u16.
+  pub mask_head_and_tex_idx: i32,
   /// extra alpha apply to current vertex
   pub opacity: f32,
-  /// keep align to 8 bytes.
-  _dummy: u32,
 }
 
 /// The mask layer describes an alpha channel layer that is used in the fragment
diff --git a/gpu/src/wgpu_impl/shaders/img_triangles.wgsl b/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
index 5ebf6482d..463665e52 100644
--- a/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
@@ -10,15 +10,12 @@ struct ImgPrimitive {
   img_start: vec2<f32>,
   /// The size of the image image.
   img_size: vec2<f32>,
-  /// The index of texture, `load_color_primitives` method provide all textures
-  /// a draw phase need.
-  img_tex_idx: u32,
-  /// The index of the head mask layer.
-  mask_head: i32,
+  /// This is a mix field,
+  /// - the high 16 bits is the index of head mask layer, as a i16 type.
+  /// - the low 16 bits is the index of texture, as a u16 type.
+  mask_head_and_tex_idx: i32,
   /// extra alpha apply to current vertex
   opacity: f32,
-  /// keep align to 8 bytes.
-  dummy: u32,
 }
 
 struct VertexOutput {
@@ -79,43 +76,43 @@ fn fs_main(f: VertexOutput) -> @location(0) vec4<f32> {
     var color: vec4<f32>;
     let pos = prim.transform * f.pos.xyz;
     var img_pos = pos.xy % prim.img_size + prim.img_start;
-    switch prim.img_tex_idx {
-        case 0u: {
+    switch abs(prim.mask_head_and_tex_idx & 0x0000FFFF) {
+        case 0: {
             let img_tex_size = textureDimensions(tex_0);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_0, s_sampler, img_pos);
         }
-        case 1u: {
+        case 1: {
             let img_tex_size = textureDimensions(tex_1);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_1, s_sampler, img_pos);
         }
-        case 2u: {
+        case 2: {
             let img_tex_size = textureDimensions(tex_2);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_2, s_sampler, img_pos);
         }
-        case 3u: {
+        case 3: {
             let img_tex_size = textureDimensions(tex_3);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_3, s_sampler, img_pos);
         }
-        case 4u: {
+        case 4: {
             let img_tex_size = textureDimensions(tex_4);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_4, s_sampler, img_pos);
         }
-        case 5u: {
+        case 5: {
             let img_tex_size = textureDimensions(tex_5);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_5, s_sampler, img_pos);
         }
-        case 6u: {
+        case 6: {
             let img_tex_size = textureDimensions(tex_6);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_6, s_sampler, img_pos);
         }
-        case 7u: {
+        case 7: {
             let img_tex_size = textureDimensions(tex_7);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
             color = textureSample(tex_7, s_sampler, img_pos);
@@ -123,8 +120,7 @@ fn fs_main(f: VertexOutput) -> @location(0) vec4<f32> {
         default: { color = vec4<f32>(1., 0., 0., 1.); }
       };
 
-
-    var mask_idx = prim.mask_head;
+    var mask_idx = prim.mask_head_and_tex_idx >> 16 ;
     loop {
         if mask_idx < 0 {
             break;
diff --git a/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl b/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
index 706154e7b..0f5db25d1 100644
--- a/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
@@ -27,29 +27,34 @@ struct MaskLayer {
   prev_mask_idx: i32,
 }
 
+// A pair of stops. This arrangement aligns the stops with 16 bytes, minimizing excessive padding.
+struct StopPair {
+    color1: u32,
+    offset1: f32,
+    color2: u32,
+    offset2: f32,
+}
+
 struct Stop {
-    red: f32,
-    green: f32,
-    blue: f32,
-    alpha: f32,
+    color: vec4<f32>,
     offset: f32,
 }
 
 struct Primitive {
   transform: mat3x2<f32>,
-  stop_start: i32,
-  stop_cnt: i32,
   start_position: vec2<f32>,
   end_position: vec2<f32>,
-  mask_head: i32,
-  spread: u32, // 0 for pad, 1 for reflect, 2 for repeat
+  // A value mixed stop_start(u16) and stop_cnt(u16)
+  stop: u32,
+  // A value mixed mask_head(i16) and spread(u16)
+  mask_head_and_spread: i32
 }
 
 @group(0) @binding(0) 
 var<storage> mask_layers: array<MaskLayer>;
 
 @group(1) @binding(0)
-var<storage> stops: array<Stop>;
+var<storage> stops: array<StopPair>;
 
 @group(2) @binding(0)
 var<storage> prims: array<Primitive>;
@@ -83,12 +88,31 @@ fn calc_offset(x: f32, y: f32, x_0: f32, y_0: f32, x_1: f32, y_1: f32) -> f32 {
     return (dx_0 * dx_1_0 + dy_0 * dy_1_0) / (dx_1_0 * dx_1_0 + dy_1_0 * dy_1_0);
 }
 
+fn unpackUnorm4x8(packed: u32) -> vec4<f32> {
+    return vec4<f32>(
+        f32((packed & 0xff000000) >> 24) / 255.0,
+        f32((packed & 0x00ff0000) >> 16) / 255.0,
+        f32((packed & 0x0000ff00) >> 8) / 255.0,
+        f32((packed & 0x000000ff) >> 0) / 255.0
+    );
+}
+
+fn get_stop(idx: u32)  -> Stop {
+    let pair = stops[idx / 2];
+    if idx % 2 == 0 {
+        return Stop(unpackUnorm4x8(pair.color1), pair.offset1);
+    } else {
+        return Stop(unpackUnorm4x8(pair.color2), pair.offset2);
+    }
+}
+
 @fragment
 fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
     let prim = prims[input.prim_idx];
     let pos = prim.transform * vec3(input.pos.xy, 1.);
     var alpha = 1.;
-    var mask_idx = prim.mask_head;
+    var mask_idx = prim.mask_head_and_spread >> 16;
+
     loop {
         if mask_idx < 0 {
             break;
@@ -155,11 +179,12 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         return vec4<f32>(1., 1., 1., alpha);
     }
     var offset = calc_offset(pos.x, pos.y, prim.start_position.x, prim.start_position.y, prim.end_position.x, prim.end_position.y);
-
-    if prim.spread == 0u {
+    
+    let spread = abs(prim.mask_head_and_spread & 0x0000ffff);
+    if spread == 0 {
         // pad
         offset = min(1., max(0., offset));
-    } else if prim.spread == 1u {
+    } else if spread == 1 {
         //reflect
         offset = 1. - abs(fract(offset / 2.) - 0.5) * 2.;
     } else {
@@ -167,17 +192,17 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         offset = fract(offset);
     }
 
-    var prev = stops[prim.stop_start];
-    var next = stops[prim.stop_start + 1];
-    for (var i = 2; i < prim.stop_cnt && next.offset < offset; i++) {
+    let stop_start = prim.stop >> 16;
+    let stop_cnt = prim.stop & 0x0000ffff;
+    var prev = get_stop(stop_start);
+    var next = get_stop(stop_start + 1);
+    for (var i = 2u; i < stop_cnt && next.offset < offset; i++) {
         prev = next;
-        next = stops[prim.stop_start + i];
+        next = get_stop(stop_start + i);
     }
 
     offset = max(prev.offset, min(next.offset, offset));
     let weight1 = (next.offset - offset) / (next.offset - prev.offset);
     let weight2 = 1. - weight1;
-    let prev_color = vec4<f32>(prev.red, prev.green, prev.blue, prev.alpha);
-    let next_color = vec4<f32>(next.red, next.green, next.blue, next.alpha);
-    return (prev_color * weight1 + next_color * weight2) * vec4<f32>(1., 1., 1., alpha);
+    return (prev.color * weight1 + next.color * weight2) * vec4<f32>(1., 1., 1., alpha);
 }
diff --git a/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl b/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
index b2c7aa770..411db8601 100644
--- a/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
@@ -27,18 +27,23 @@ struct MaskLayer {
   prev_mask_idx: i32,
 }
 
+// A pair of stops. This arrangement aligns the stops with 16 bytes, minimizing excessive padding.
+struct StopPair {
+    color1: u32,
+    offset1: f32,
+    color2: u32,
+    offset2: f32,
+}
+
 struct Stop {
-    red: f32,
-    green: f32,
-    blue: f32,
-    alpha: f32,
+    color: vec4<f32>,
     offset: f32,
 }
 
 struct Primitive {
   transform: mat3x2<f32>,
-  stop_start: i32,
-  stop_cnt: i32,
+  stop_start: u32,
+  stop_cnt: u32,
   start_center: vec2<f32>,
   end_center: vec2<f32>,
   start_radius: f32,
@@ -51,7 +56,7 @@ struct Primitive {
 var<storage> mask_layers: array<MaskLayer>;
 
 @group(1) @binding(0)
-var<storage> stops: array<Stop>;
+var<storage> stops: array<StopPair>;
 
 @group(2) @binding(0)
 var<storage> prims: array<Primitive>;
@@ -75,6 +80,25 @@ var tex_6: texture_2d<f32>;
 @group(3) @binding(8)
 var tex_7: texture_2d<f32>;
 
+fn unpackUnorm4x8(packed: u32) -> vec4<f32> {
+    return vec4<f32>(
+        f32((packed & 0xff000000) >> 24) / 255.0,
+        f32((packed & 0x00ff0000) >> 16) / 255.0,
+        f32((packed & 0x0000ff00) >> 8) / 255.0,
+        f32((packed & 0x000000ff) >> 0) / 255.0
+    );
+}
+
+
+fn get_stop(idx: u32)  -> Stop {
+    let pair = stops[idx / 2];
+    if idx % 2 == 0 {
+        return Stop(unpackUnorm4x8(pair.color1), pair.offset1);
+    } else {
+        return Stop(unpackUnorm4x8(pair.color2), pair.offset2);
+    }
+}
+
 @fragment
 fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
     let prim = prims[input.prim_idx];
@@ -161,19 +185,17 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         offset = fract(offset);
     }
 
-    var prev = stops[prim.stop_start];
-    var next = stops[prim.stop_start + 1];
-    for (var i = 2; i < prim.stop_cnt && next.offset < offset; i++) {
+    var prev = get_stop(prim.stop_start);
+    var next = get_stop(prim.stop_start + 1);
+    for (var i = 2u; i < prim.stop_cnt && next.offset < offset; i++) {
         prev = next;
-        next = stops[prim.stop_start + i];
+        next = get_stop(prim.stop_start + i);
     }
 
     offset = max(prev.offset, min(next.offset, offset));
     let weight1 = (next.offset - offset) / (next.offset - prev.offset);
     let weight2 = 1. - weight1;
-    let prev_color = vec4<f32>(prev.red, prev.green, prev.blue, prev.alpha);
-    let next_color = vec4<f32>(next.red, next.green, next.blue, next.alpha);
-    return (prev_color * weight1 + next_color * weight2) * vec4<f32>(1., 1., 1., alpha);
+    return (prev.color * weight1 + next.color * weight2) * vec4<f32>(1., 1., 1., alpha);
 }
 // input the center and radius of the circles, return the tag of resolvable (1. mean resolvable and -1. unresolvable) and the offset if tag is resolvable.
 fn calc_offset(x: f32, y: f32, x_0: f32, y_0: f32, r_0: f32, x_1: f32, y_1: f32, r_1: f32) -> vec2<f32> {

From 007dff44cb93cdb672906c59e365396b0fe32529 Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Tue, 14 May 2024 22:17:43 +0800
Subject: [PATCH 3/6] =?UTF-8?q?feat(gpu):=20=F0=9F=8E=B8=20the=20`wgpu`=20?=
 =?UTF-8?q?implementation=20is=20compatible=20with=20WebGL?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md                                  |  3 +-
 Cargo.toml                                    |  4 +-
 gpu/src/gpu_backend.rs                        | 50 ++++++----
 gpu/src/gpu_backend/atlas.rs                  |  7 +-
 gpu/src/gpu_backend/textures_mgr.rs           |  2 +-
 gpu/src/lib.rs                                | 30 ++++--
 gpu/src/wgpu_impl.rs                          | 96 ++++++++++++-------
 .../wgpu_impl/draw_alpha_triangles_pass.rs    |  6 +-
 .../wgpu_impl/draw_color_triangles_pass.rs    | 11 +--
 gpu/src/wgpu_impl/draw_img_triangles_pass.rs  | 33 +++----
 .../wgpu_impl/draw_linear_gradient_pass.rs    | 51 +++++-----
 .../wgpu_impl/draw_radial_gradient_pass.rs    | 48 ++++------
 gpu/src/wgpu_impl/draw_texture_pass.rs        |  7 +-
 .../wgpu_impl/shaders/color_triangles.wgsl    | 20 +++-
 gpu/src/wgpu_impl/shaders/img_triangles.wgsl  | 45 ++++++---
 .../shaders/linear_gradient_triangles.wgsl    | 32 +++++--
 .../shaders/radial_gradient_triangles.wgsl    | 32 +++++--
 gpu/src/wgpu_impl/{storage.rs => uniform.rs}  | 53 ++++------
 18 files changed, 313 insertions(+), 217 deletions(-)
 rename gpu/src/wgpu_impl/{storage.rs => uniform.rs} (51%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5414cbf6c..723aeadb3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,10 +30,11 @@ Please only add new entries below the [Unreleased](#unreleased---releasedate) he
 - **core**: The split functions in `StateReader::map_reader`, `StateWriter::map_writer`, and `StateWriter::split_writer` no longer need to return a reference. (#568 @M-Adoo)
 - **core**: Introduced `StateWatcher` for watching state modifies, which was previously the responsibility of `StateReader`. This results in a cleaner and more compact `StateReader` implementation. (#556, @M-Adoo)
 - **gpu**: Introduced `GPUBackendImpl::max_textures_per_draw` to set a limit on textures per draw phase (#562 @M-Adoo)
+- **gpu**: Updated the `wgpu` implementation of the GPU backend to support WebGL. (#578, @M-Adoo)
 
 ### Fixed
 
-- **gpu**: Retrieve the texture limit size from the GPU instead of using a hardcoded value. (#pr, @M-Adoo)
+- **gpu**: Retrieve the texture limit size from the GPU instead of using a hardcoded value. (#578, @M-Adoo)
 
 ### Changed
 
diff --git a/Cargo.toml b/Cargo.toml
index 5ff8b3d42..49a33dc1a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,9 @@ resolver = "2"
 debug = true
 
 [profile.release]
-debug = true
+lto = true
+strip = true  
+codegen-units = 1
 
 [workspace.package]
 authors = ["RibirX<Adoo@outlook.com>"]
diff --git a/gpu/src/gpu_backend.rs b/gpu/src/gpu_backend.rs
index aac4595fd..0812bdd56 100644
--- a/gpu/src/gpu_backend.rs
+++ b/gpu/src/gpu_backend.rs
@@ -107,15 +107,13 @@ where
     self.begin_draw_phase();
     let output_size = output.size();
     for cmd in commands.into_iter() {
+      let max_tex_per_draw = self.gpu_impl.limits().max_tex_load;
       let maybe_used = match cmd {
         PaintCommand::ImgPath { .. } => 2,
         PaintCommand::PopClip => 0,
         _ => 1,
       };
-      if self.tex_ids_map.all_textures().len() + maybe_used
-        >= self.gpu_impl.load_tex_limit_per_draw()
-        || !self.continues_cmd(&cmd)
-      {
+      if !self.can_batch(&cmd) {
         // if the next command may hit the texture limit, submit the current draw phase.
         // And start a new draw phase.
         self.draw_triangles(output);
@@ -123,8 +121,7 @@ where
         self.begin_draw_phase();
 
         assert!(
-          self.tex_ids_map.all_textures().len() + maybe_used
-            < self.gpu_impl.load_tex_limit_per_draw(),
+          self.tex_ids_map.all_textures().len() + maybe_used < max_tex_per_draw,
           "The GPUBackend implementation does not provide a sufficient texture limit per draw."
         )
       }
@@ -195,7 +192,7 @@ where
           let img_start = img_slice.rect.origin.to_f32().to_array();
           let img_size = img_slice.rect.size.to_f32().to_array();
           let mask_head_and_tex_idx =
-            (mask_head as i32) << 16 | self.tex_ids_map.tex_idx(img_slice.tex_id) as i32;
+            mask_head << 16 | self.tex_ids_map.tex_idx(img_slice.tex_id) as i32;
           let prim_idx = self.img_prims.len() as u32;
           let prim = ImgPrimitive {
             transform: ts.inverse().unwrap().to_array(),
@@ -242,8 +239,7 @@ where
         let ts = path.transform;
         if let Some((rect, mask_head)) = self.new_mask_layer(path) {
           let stop = (self.linear_gradient_stops.len() << 16 | linear_gradient.stops.len()) as u32;
-          let mask_head_and_spread =
-            (mask_head as i32) << 16 | linear_gradient.spread_method as i32;
+          let mask_head_and_spread = mask_head << 16 | linear_gradient.spread_method as i32;
           let prim: LinearGradientPrimitive = LinearGradientPrimitive {
             transform: ts.inverse().unwrap().to_array(),
             stop,
@@ -343,17 +339,29 @@ where
     self.linear_gradient_stops.clear();
   }
 
-  fn continues_cmd(&self, cmd: &PaintCommand) -> bool {
-    matches!(
-      (self.current_phase, cmd),
-      (CurrentPhase::None, _)
-        | (_, PaintCommand::Clip(_))
-        | (_, PaintCommand::PopClip)
-        | (CurrentPhase::Color, PaintCommand::ColorPath { .. })
-        | (CurrentPhase::Img, PaintCommand::ImgPath { .. })
-        | (CurrentPhase::RadialGradient, PaintCommand::RadialGradient { .. })
-        | (CurrentPhase::LinearGradient, PaintCommand::LinearGradient { .. })
-    )
+  fn can_batch(&self, cmd: &PaintCommand) -> bool {
+    let limits = self.gpu_impl.limits();
+    let tex_used = self.tex_ids_map.all_textures().len();
+    match (self.current_phase, cmd) {
+      (CurrentPhase::None, _) | (_, PaintCommand::PopClip) => true,
+      (_, PaintCommand::Clip(_)) | (CurrentPhase::Color, PaintCommand::ColorPath { .. }) => {
+        tex_used < limits.max_tex_load
+      }
+      (CurrentPhase::Img, PaintCommand::ImgPath { .. }) => {
+        tex_used < limits.max_tex_load - 1 && self.img_prims.len() < limits.max_image_primitives
+      }
+      (CurrentPhase::RadialGradient, PaintCommand::RadialGradient { .. }) => {
+        tex_used < limits.max_tex_load
+          && self.radial_gradient_prims.len() < limits.max_radial_gradient_primitives
+          && self.radial_gradient_stops.len() < limits.max_gradient_stop_primitives
+      }
+      (CurrentPhase::LinearGradient, PaintCommand::LinearGradient { .. }) => {
+        tex_used < limits.max_tex_load
+          && self.linear_gradient_prims.len() < limits.max_linear_gradient_primitives
+          && self.linear_gradient_stops.len() < limits.max_gradient_stop_primitives
+      }
+      _ => false,
+    }
   }
 
   fn current_clip_mask_index(&self) -> i32 {
@@ -412,7 +420,7 @@ where
     gpu_impl.load_mask_layers(&self.mask_layers);
 
     let textures = self.tex_ids_map.all_textures();
-    let max_textures = gpu_impl.load_tex_limit_per_draw();
+    let max_textures = gpu_impl.limits().max_tex_load;
     let mut tex_buffer = Vec::with_capacity(max_textures);
     textures.iter().take(max_textures).for_each(|id| {
       tex_buffer.push(self.tex_mgr.texture(*id));
diff --git a/gpu/src/gpu_backend/atlas.rs b/gpu/src/gpu_backend/atlas.rs
index d9d7051fe..88183d631 100644
--- a/gpu/src/gpu_backend/atlas.rs
+++ b/gpu/src/gpu_backend/atlas.rs
@@ -172,7 +172,7 @@ where
   pub(crate) fn end_frame(&mut self) {
     self
       .cache
-      .end_frame(&self.config.label)
+      .end_frame(self.config.label)
       .for_each(|h| release_handle!(self, h));
     self
       .islands
@@ -232,14 +232,15 @@ mod tests {
   #[test]
   fn resource_clear() {
     let mut wgpu = block_on(WgpuImpl::headless());
+    let size = wgpu.limits().texture_size;
     let mut atlas = Atlas::<WgpuTexture, _, _>::new(
-      AtlasConfig::new("", DeviceSize::new(4096, 4096)),
+      AtlasConfig::new("", size),
       ColorFormat::Rgba8,
       AntiAliasing::None,
       &mut wgpu,
     );
     atlas.allocate(1, (), DeviceSize::new(32, 32), &mut wgpu);
-    atlas.allocate(2, (), DeviceSize::new(4097, 16), &mut wgpu);
+    atlas.allocate(2, (), size, &mut wgpu);
     atlas.end_frame();
     atlas.end_frame();
     wgpu.end_frame();
diff --git a/gpu/src/gpu_backend/textures_mgr.rs b/gpu/src/gpu_backend/textures_mgr.rs
index ec94d37b8..b2559d7d6 100644
--- a/gpu/src/gpu_backend/textures_mgr.rs
+++ b/gpu/src/gpu_backend/textures_mgr.rs
@@ -75,7 +75,7 @@ where
   T::Host: GPUBackendImpl<Texture = T>,
 {
   pub(super) fn new(gpu_impl: &mut T::Host, anti_aliasing: AntiAliasing) -> Self {
-    let max_size = gpu_impl.texture_size_limit();
+    let max_size = gpu_impl.limits().texture_size;
 
     Self {
       alpha_atlas: Atlas::new(
diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs
index 2738b0102..16f4a7bf7 100644
--- a/gpu/src/lib.rs
+++ b/gpu/src/lib.rs
@@ -82,12 +82,8 @@ pub trait GPUBackendImpl {
   /// A frame start, call once per frame
   fn begin_frame(&mut self);
 
-  /// Returns the maximum number of textures that the backend can load in a
-  /// single draw phase.
-  #[inline]
-  fn load_tex_limit_per_draw(&self) -> usize { 8 }
-
-  fn texture_size_limit(&self) -> DeviceSize;
+  /// Returns the limits of the GPU backend.
+  fn limits(&self) -> &DrawPhaseLimits;
 
   /// Create a texture.
   fn new_texture(
@@ -167,6 +163,28 @@ pub trait GPUBackendImpl {
   fn end_frame(&mut self);
 }
 
+/// Represents the sets of limits an GPU backend can provide in a single draw
+pub struct DrawPhaseLimits {
+  /// The maximum size of the texture that the backend can create.
+  pub texture_size: DeviceSize,
+  /// The maximum number of textures that the backend can load in a single draw
+  pub max_tex_load: usize,
+  /// The maximum number of mask layers that the backend can load in a single
+  /// draw phase
+  pub max_image_primitives: usize,
+  /// The maximum number of radial gradient primitives that the backend can load
+  /// in a single draw
+  pub max_radial_gradient_primitives: usize,
+  /// The maximum number of linear gradient primitives that the backend can load
+  /// in a single draw
+  pub max_linear_gradient_primitives: usize,
+  /// The maximum number of gradient stops that the backend can load in a single
+  /// draw phase
+  pub max_gradient_stop_primitives: usize,
+  /// The maximum number of mask layers that the backend can load in a single
+  pub max_mask_layers: usize,
+}
+
 #[repr(packed)]
 #[derive(AsBytes, PartialEq, Clone, Copy)]
 pub struct ColorAttr {
diff --git a/gpu/src/wgpu_impl.rs b/gpu/src/wgpu_impl.rs
index 25998c525..0d2ccab35 100644
--- a/gpu/src/wgpu_impl.rs
+++ b/gpu/src/wgpu_impl.rs
@@ -1,4 +1,8 @@
-use std::{error::Error, mem::MaybeUninit, ops::Range};
+use std::{
+  error::Error,
+  mem::{size_of, MaybeUninit},
+  ops::Range,
+};
 
 use futures::channel::oneshot;
 use ribir_geom::{DevicePoint, DeviceRect, DeviceSize};
@@ -9,15 +13,15 @@ use self::{
   draw_color_triangles_pass::DrawColorTrianglesPass, draw_img_triangles_pass::DrawImgTrianglesPass,
   draw_linear_gradient_pass::DrawLinearGradientTrianglesPass,
   draw_radial_gradient_pass::DrawRadialGradientTrianglesPass, draw_texture_pass::DrawTexturePass,
-  storage::Storage,
+  uniform::Uniform,
 };
 use crate::{
-  gpu_backend::Texture, ColorAttr, GPUBackendImpl, GradientStopPrimitive, ImagePrimIndex,
-  ImgPrimitive, LinearGradientPrimIndex, LinearGradientPrimitive, MaskLayer,
+  gpu_backend::Texture, ColorAttr, DrawPhaseLimits, GPUBackendImpl, GradientStopPrimitive,
+  ImagePrimIndex, ImgPrimitive, LinearGradientPrimIndex, LinearGradientPrimitive, MaskLayer,
   RadialGradientPrimIndex, RadialGradientPrimitive,
 };
 mod buffer_pool;
-mod storage;
+mod uniform;
 mod vertex_buffer;
 
 mod draw_alpha_triangles_pass;
@@ -27,6 +31,13 @@ mod draw_linear_gradient_pass;
 mod draw_radial_gradient_pass;
 mod draw_texture_pass;
 
+pub const TEX_PER_DRAW: usize = 8;
+const MAX_IMG_PRIMS: usize = 1024;
+const MAX_RADIAL_GRADIENT_PRIMS: usize = 512;
+const MAX_LINEAR_GRADIENT_PRIMS: usize = 512;
+const MAX_GRADIENT_STOP_PRIMS: usize = 512;
+const MAX_MASK_LAYERS: usize = (64 << 10) / size_of::<MaskLayer>(); // around 64KB
+
 pub struct WgpuImpl {
   device: wgpu::Device,
   queue: wgpu::Queue,
@@ -43,7 +54,8 @@ pub struct WgpuImpl {
   linear_gradient_pass: DrawLinearGradientTrianglesPass,
   texs_layout: wgpu::BindGroupLayout,
   textures_bind: Option<wgpu::BindGroup>,
-  mask_layers_storage: Storage<MaskLayer>,
+  mask_layers_uniform: Uniform<MaskLayer>,
+  limits: DrawPhaseLimits,
 }
 
 macro_rules! command_encoder {
@@ -57,17 +69,10 @@ macro_rules! command_encoder {
 }
 pub(crate) use command_encoder;
 
-const TEX_PER_DRAW: usize = 8;
-
 impl GPUBackendImpl for WgpuImpl {
   type Texture = WgpuTexture;
 
-  fn texture_size_limit(&self) -> DeviceSize {
-    let limits = self.device.limits();
-    DeviceSize::new(limits.max_texture_dimension_2d as i32, limits.max_texture_dimension_2d as i32)
-  }
-
-  fn load_tex_limit_per_draw(&self) -> usize { TEX_PER_DRAW }
+  fn limits(&self) -> &DrawPhaseLimits { &self.limits }
 
   fn begin_frame(&mut self) {
     if self.command_encoder.is_none() {
@@ -134,19 +139,19 @@ impl GPUBackendImpl for WgpuImpl {
   fn load_img_primitives(&mut self, primitives: &[ImgPrimitive]) {
     self
       .img_triangles_pass
-      .load_img_primitives(&self.device, &self.queue, primitives);
+      .load_img_primitives(&self.queue, primitives);
   }
 
   fn load_radial_gradient_primitives(&mut self, primitives: &[RadialGradientPrimitive]) {
     self
       .radial_gradient_pass
-      .load_radial_gradient_primitives(&self.device, &self.queue, primitives);
+      .load_radial_gradient_primitives(&self.queue, primitives);
   }
 
   fn load_radial_gradient_stops(&mut self, stops: &[GradientStopPrimitive]) {
     self
       .radial_gradient_pass
-      .load_gradient_stops(&self.device, &self.queue, stops);
+      .load_gradient_stops(&self.queue, stops);
   }
 
   fn load_radial_gradient_vertices(&mut self, buffers: &VertexBuffers<RadialGradientPrimIndex>) {
@@ -158,13 +163,13 @@ impl GPUBackendImpl for WgpuImpl {
   fn load_linear_gradient_primitives(&mut self, primitives: &[LinearGradientPrimitive]) {
     self
       .linear_gradient_pass
-      .load_linear_gradient_primitives(&self.device, &self.queue, primitives);
+      .load_linear_gradient_primitives(&self.queue, primitives);
   }
 
   fn load_linear_gradient_stops(&mut self, stops: &[GradientStopPrimitive]) {
     self
       .linear_gradient_pass
-      .load_gradient_stops(&self.device, &self.queue, stops);
+      .load_gradient_stops(&self.queue, stops);
   }
 
   fn load_linear_gradient_vertices(&mut self, buffers: &VertexBuffers<LinearGradientPrimIndex>) {
@@ -175,8 +180,8 @@ impl GPUBackendImpl for WgpuImpl {
 
   fn load_mask_layers(&mut self, layers: &[crate::MaskLayer]) {
     self
-      .mask_layers_storage
-      .write_buffer(&self.device, &self.queue, layers);
+      .mask_layers_uniform
+      .write_buffer(&self.queue, layers);
   }
 
   fn draw_alpha_triangles(&mut self, indices: &Range<u32>, texture: &mut Self::Texture) {
@@ -198,7 +203,7 @@ impl GPUBackendImpl for WgpuImpl {
       &self.device,
       encoder,
       self.textures_bind.as_ref().unwrap(),
-      &self.mask_layers_storage,
+      &self.mask_layers_uniform,
     );
 
     self.submit()
@@ -216,7 +221,7 @@ impl GPUBackendImpl for WgpuImpl {
       &self.device,
       encoder,
       self.textures_bind.as_ref().unwrap(),
-      &self.mask_layers_storage,
+      &self.mask_layers_uniform,
     );
 
     self.submit()
@@ -246,7 +251,7 @@ impl GPUBackendImpl for WgpuImpl {
       &self.device,
       encoder,
       self.textures_bind.as_ref().unwrap(),
-      &self.mask_layers_storage,
+      &self.mask_layers_uniform,
     );
     self.submit()
   }
@@ -262,7 +267,7 @@ impl GPUBackendImpl for WgpuImpl {
       &self.device,
       encoder,
       self.textures_bind.as_ref().unwrap(),
-      &self.mask_layers_storage,
+      &self.mask_layers_uniform,
     );
     self.submit()
   }
@@ -547,7 +552,10 @@ impl WgpuImpl {
 
     let (device, queue) = adapter
       .request_device(
-        &wgpu::DeviceDescriptor { label: Some("Request device"), ..Default::default() },
+        &wgpu::DeviceDescriptor {
+          required_limits: wgpu::Limits::downlevel_webgl2_defaults(),
+          ..Default::default()
+        },
         None,
       )
       .await
@@ -569,16 +577,39 @@ impl WgpuImpl {
     let draw_tex_pass = DrawTexturePass::new(&device);
     let alpha_triangles_pass = DrawAlphaTrianglesPass::new(&device);
 
-    let mask_layers_storage = Storage::new(&device, wgpu::ShaderStages::FRAGMENT, 512);
+    let limits = device.limits();
+    let max_uniform_bytes = limits.max_uniform_buffer_binding_size as usize;
+    assert!(max_uniform_bytes >= size_of::<MaskLayer>() * MAX_MASK_LAYERS);
+    assert!(max_uniform_bytes >= size_of::<ImgPrimitive>() * MAX_IMG_PRIMS);
+    assert!(max_uniform_bytes >= size_of::<RadialGradientPrimitive>() * MAX_RADIAL_GRADIENT_PRIMS);
+    assert!(max_uniform_bytes >= size_of::<LinearGradientPrimitive>() * MAX_LINEAR_GRADIENT_PRIMS);
+    assert!(max_uniform_bytes >= size_of::<GradientStopPrimitive>() * MAX_GRADIENT_STOP_PRIMS);
+
+    let texture_size_limit = DeviceSize::new(
+      limits.max_texture_dimension_2d as i32,
+      limits.max_texture_dimension_2d as i32,
+    );
+
+    let limits = DrawPhaseLimits {
+      texture_size: texture_size_limit,
+      max_tex_load: 8,
+      max_image_primitives: MAX_IMG_PRIMS,
+      max_radial_gradient_primitives: MAX_RADIAL_GRADIENT_PRIMS,
+      max_linear_gradient_primitives: MAX_LINEAR_GRADIENT_PRIMS,
+      max_gradient_stop_primitives: MAX_GRADIENT_STOP_PRIMS,
+      max_mask_layers: MAX_MASK_LAYERS,
+    };
+
+    let mask_layers_uniform = Uniform::new(&device, wgpu::ShaderStages::FRAGMENT, MAX_MASK_LAYERS);
     let texs_layout = textures_layout(&device);
     let color_triangles_pass =
-      DrawColorTrianglesPass::new(&device, mask_layers_storage.layout(), &texs_layout);
+      DrawColorTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     let img_triangles_pass =
-      DrawImgTrianglesPass::new(&device, mask_layers_storage.layout(), &texs_layout);
+      DrawImgTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     let radial_gradient_pass =
-      DrawRadialGradientTrianglesPass::new(&device, mask_layers_storage.layout(), &texs_layout);
+      DrawRadialGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     let linear_gradient_pass =
-      DrawLinearGradientTrianglesPass::new(&device, mask_layers_storage.layout(), &texs_layout);
+      DrawLinearGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     WgpuImpl {
       device,
       queue,
@@ -594,7 +625,8 @@ impl WgpuImpl {
       linear_gradient_pass,
       texs_layout,
       textures_bind: None,
-      mask_layers_storage,
+      mask_layers_uniform,
+      limits,
     }
   }
 
diff --git a/gpu/src/wgpu_impl/draw_alpha_triangles_pass.rs b/gpu/src/wgpu_impl/draw_alpha_triangles_pass.rs
index fa84b3bbb..df87de8e5 100644
--- a/gpu/src/wgpu_impl/draw_alpha_triangles_pass.rs
+++ b/gpu/src/wgpu_impl/draw_alpha_triangles_pass.rs
@@ -2,6 +2,7 @@ use std::{mem::size_of, ops::Range};
 
 use ribir_geom::DeviceRect;
 use ribir_painter::{AntiAliasing, Vertex, VertexBuffers};
+use wgpu::include_wgsl;
 
 use super::vertex_buffer::VerticesBuffer;
 use crate::WgpuTexture;
@@ -16,10 +17,7 @@ pub struct DrawAlphaTrianglesPass {
 impl DrawAlphaTrianglesPass {
   pub fn new(device: &wgpu::Device) -> Self {
     let vertices_buffer = VerticesBuffer::new(2048, 4096, device);
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Alpha triangles"),
-      source: wgpu::ShaderSource::Wgsl(include_str!("./shaders/alpha_triangles.wgsl").into()),
-    });
+    let shader = device.create_shader_module(include_wgsl!("./shaders/alpha_triangles.wgsl"));
 
     Self { anti_aliasing: AntiAliasing::None, vertices_buffer, pipeline: None, shader }
   }
diff --git a/gpu/src/wgpu_impl/draw_color_triangles_pass.rs b/gpu/src/wgpu_impl/draw_color_triangles_pass.rs
index ae3bd6486..10165dfff 100644
--- a/gpu/src/wgpu_impl/draw_color_triangles_pass.rs
+++ b/gpu/src/wgpu_impl/draw_color_triangles_pass.rs
@@ -1,8 +1,9 @@
 use std::{mem::size_of, ops::Range};
 
 use ribir_painter::{Color, Vertex, VertexBuffers};
+use wgpu::include_wgsl;
 
-use super::{storage::Storage, vertex_buffer::VerticesBuffer};
+use super::{uniform::Uniform, vertex_buffer::VerticesBuffer};
 use crate::{ColorAttr, MaskLayer, WgpuTexture};
 
 pub struct DrawColorTrianglesPass {
@@ -18,10 +19,8 @@ impl DrawColorTrianglesPass {
     device: &wgpu::Device, mask_layout: &wgpu::BindGroupLayout, texs_layout: &wgpu::BindGroupLayout,
   ) -> Self {
     let vertices_buffer = VerticesBuffer::new(512, 1024, device);
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Color triangles shader"),
-      source: wgpu::ShaderSource::Wgsl(include_str!("./shaders/color_triangles.wgsl").into()),
-    });
+
+    let shader = device.create_shader_module(include_wgsl!("shaders/color_triangles.wgsl"));
     let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
       label: Some("update triangles pipeline layout"),
       bind_group_layouts: &[mask_layout, texs_layout],
@@ -43,7 +42,7 @@ impl DrawColorTrianglesPass {
   pub fn draw_triangles(
     &mut self, texture: &WgpuTexture, indices: Range<u32>, clear: Option<Color>,
     device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, textures_bind: &wgpu::BindGroup,
-    mask_layer_storage: &Storage<MaskLayer>,
+    mask_layer_storage: &Uniform<MaskLayer>,
   ) {
     self.update(texture.format(), device);
     let pipeline = self.pipeline.as_ref().unwrap();
diff --git a/gpu/src/wgpu_impl/draw_img_triangles_pass.rs b/gpu/src/wgpu_impl/draw_img_triangles_pass.rs
index 799916689..061e5247d 100644
--- a/gpu/src/wgpu_impl/draw_img_triangles_pass.rs
+++ b/gpu/src/wgpu_impl/draw_img_triangles_pass.rs
@@ -1,8 +1,9 @@
 use std::{mem::size_of, ops::Range};
 
 use ribir_painter::{Color, Vertex, VertexBuffers};
+use wgpu::include_wgsl;
 
-use super::{storage::Storage, vertex_buffer::VerticesBuffer};
+use super::{uniform::Uniform, vertex_buffer::VerticesBuffer, MAX_IMG_PRIMS};
 use crate::{ImagePrimIndex, ImgPrimitive, MaskLayer, WgpuTexture};
 
 pub struct DrawImgTrianglesPass {
@@ -10,7 +11,7 @@ pub struct DrawImgTrianglesPass {
   layout: wgpu::PipelineLayout,
   pipeline: Option<wgpu::RenderPipeline>,
   shader: wgpu::ShaderModule,
-  prims_storage: Storage<ImgPrimitive>,
+  prims_uniform: Uniform<ImgPrimitive>,
   format: Option<wgpu::TextureFormat>,
 }
 
@@ -18,7 +19,7 @@ impl DrawImgTrianglesPass {
   pub fn new(
     device: &wgpu::Device, mask_layout: &wgpu::BindGroupLayout, texs_layout: &wgpu::BindGroupLayout,
   ) -> Self {
-    let prims_storage = Storage::new(device, wgpu::ShaderStages::FRAGMENT, 64);
+    let prims_storage = Uniform::new(device, wgpu::ShaderStages::FRAGMENT, MAX_IMG_PRIMS);
     let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
       label: Some("Image pipeline layout"),
       bind_group_layouts: &[mask_layout, prims_storage.layout(), texs_layout],
@@ -26,12 +27,16 @@ impl DrawImgTrianglesPass {
     });
 
     let vertices_buffer = VerticesBuffer::new(128, 512, device);
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Image triangles shader"),
-      source: wgpu::ShaderSource::Wgsl(include_str!("./shaders/img_triangles.wgsl").into()),
-    });
+    let shader = device.create_shader_module(include_wgsl!("./shaders/img_triangles.wgsl"));
 
-    Self { vertices_buffer, layout, pipeline: None, shader, prims_storage, format: None }
+    Self {
+      vertices_buffer,
+      layout,
+      pipeline: None,
+      shader,
+      prims_uniform: prims_storage,
+      format: None,
+    }
   }
 
   pub fn load_triangles_vertices(
@@ -42,19 +47,15 @@ impl DrawImgTrianglesPass {
       .write_buffer(buffers, device, queue);
   }
 
-  pub fn load_img_primitives(
-    &mut self, device: &wgpu::Device, queue: &wgpu::Queue, primitives: &[ImgPrimitive],
-  ) {
-    self
-      .prims_storage
-      .write_buffer(device, queue, primitives);
+  pub fn load_img_primitives(&mut self, queue: &wgpu::Queue, primitives: &[ImgPrimitive]) {
+    self.prims_uniform.write_buffer(queue, primitives);
   }
 
   #[allow(clippy::too_many_arguments)]
   pub fn draw_triangles(
     &mut self, texture: &WgpuTexture, indices: Range<u32>, clear: Option<Color>,
     device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, textures_bind: &wgpu::BindGroup,
-    mask_layer_storage: &Storage<MaskLayer>,
+    mask_layer_storage: &Uniform<MaskLayer>,
   ) {
     self.update(texture.format(), device);
     let pipeline = self.pipeline.as_ref().unwrap();
@@ -70,7 +71,7 @@ impl DrawImgTrianglesPass {
     rpass.set_vertex_buffer(0, self.vertices_buffer.vertices().slice(..));
     rpass.set_index_buffer(self.vertices_buffer.indices().slice(..), wgpu::IndexFormat::Uint32);
     rpass.set_bind_group(0, mask_layer_storage.bind_group(), &[]);
-    rpass.set_bind_group(1, self.prims_storage.bind_group(), &[]);
+    rpass.set_bind_group(1, self.prims_uniform.bind_group(), &[]);
     rpass.set_bind_group(2, textures_bind, &[]);
 
     rpass.set_pipeline(pipeline);
diff --git a/gpu/src/wgpu_impl/draw_linear_gradient_pass.rs b/gpu/src/wgpu_impl/draw_linear_gradient_pass.rs
index ca62901d9..9130b0eb7 100644
--- a/gpu/src/wgpu_impl/draw_linear_gradient_pass.rs
+++ b/gpu/src/wgpu_impl/draw_linear_gradient_pass.rs
@@ -1,8 +1,12 @@
 use std::{mem::size_of, ops::Range};
 
 use ribir_painter::{Color, Vertex, VertexBuffers};
+use wgpu::include_wgsl;
 
-use super::{storage::Storage, vertex_buffer::VerticesBuffer};
+use super::{
+  uniform::Uniform, vertex_buffer::VerticesBuffer, MAX_GRADIENT_STOP_PRIMS,
+  MAX_LINEAR_GRADIENT_PRIMS,
+};
 use crate::{
   GradientStopPrimitive, LinearGradientPrimIndex, LinearGradientPrimitive, MaskLayer, WgpuTexture,
 };
@@ -12,8 +16,8 @@ pub struct DrawLinearGradientTrianglesPass {
   pipeline: Option<wgpu::RenderPipeline>,
   shader: wgpu::ShaderModule,
   format: Option<wgpu::TextureFormat>,
-  prims_storage: Storage<LinearGradientPrimitive>,
-  stops_storage: Storage<GradientStopPrimitive>,
+  prims_uniform: Uniform<LinearGradientPrimitive>,
+  stops_uniform: Uniform<GradientStopPrimitive>,
   layout: wgpu::PipelineLayout,
 }
 
@@ -22,20 +26,17 @@ impl DrawLinearGradientTrianglesPass {
     device: &wgpu::Device, mask_layout: &wgpu::BindGroupLayout, texs_layout: &wgpu::BindGroupLayout,
   ) -> Self {
     let vertices_buffer = VerticesBuffer::new(512, 1024, device);
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Linear triangles shader"),
-      source: wgpu::ShaderSource::Wgsl(
-        include_str!("./shaders/linear_gradient_triangles.wgsl").into(),
-      ),
-    });
-    let prims_storage = Storage::new(device, wgpu::ShaderStages::FRAGMENT, 64);
-    let stops_storage = Storage::new(device, wgpu::ShaderStages::FRAGMENT, 64);
+    let shader =
+      device.create_shader_module(include_wgsl!("./shaders/linear_gradient_triangles.wgsl"));
+    let prims_uniform =
+      Uniform::new(device, wgpu::ShaderStages::FRAGMENT, MAX_LINEAR_GRADIENT_PRIMS);
+    let stops_unifrom = Uniform::new(device, wgpu::ShaderStages::FRAGMENT, MAX_GRADIENT_STOP_PRIMS);
     let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
       label: Some("update triangles pipeline layout"),
       bind_group_layouts: &[
         mask_layout,
-        stops_storage.layout(),
-        prims_storage.layout(),
+        stops_unifrom.layout(),
+        prims_uniform.layout(),
         texs_layout,
       ],
       push_constant_ranges: &[],
@@ -45,8 +46,8 @@ impl DrawLinearGradientTrianglesPass {
       pipeline: None,
       shader,
       format: None,
-      prims_storage,
-      stops_storage,
+      prims_uniform,
+      stops_uniform: stops_unifrom,
       layout,
     }
   }
@@ -61,26 +62,20 @@ impl DrawLinearGradientTrianglesPass {
   }
 
   pub fn load_linear_gradient_primitives(
-    &mut self, device: &wgpu::Device, queue: &wgpu::Queue, primitives: &[LinearGradientPrimitive],
+    &mut self, queue: &wgpu::Queue, primitives: &[LinearGradientPrimitive],
   ) {
-    self
-      .prims_storage
-      .write_buffer(device, queue, primitives);
+    self.prims_uniform.write_buffer(queue, primitives);
   }
 
-  pub fn load_gradient_stops(
-    &mut self, device: &wgpu::Device, queue: &wgpu::Queue, stops: &[GradientStopPrimitive],
-  ) {
-    self
-      .stops_storage
-      .write_buffer(device, queue, stops);
+  pub fn load_gradient_stops(&mut self, queue: &wgpu::Queue, stops: &[GradientStopPrimitive]) {
+    self.stops_uniform.write_buffer(queue, stops);
   }
 
   #[allow(clippy::too_many_arguments)]
   pub fn draw_triangles(
     &mut self, texture: &WgpuTexture, indices: Range<u32>, clear: Option<Color>,
     device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, textures_bind: &wgpu::BindGroup,
-    mask_layer_storage: &Storage<MaskLayer>,
+    mask_layer_storage: &Uniform<MaskLayer>,
   ) {
     self.update(texture.format(), device);
     let pipeline = self.pipeline.as_ref().unwrap();
@@ -97,8 +92,8 @@ impl DrawLinearGradientTrianglesPass {
     rpass.set_vertex_buffer(0, self.vertices_buffer.vertices().slice(..));
     rpass.set_index_buffer(self.vertices_buffer.indices().slice(..), wgpu::IndexFormat::Uint32);
     rpass.set_bind_group(0, mask_layer_storage.bind_group(), &[]);
-    rpass.set_bind_group(1, self.stops_storage.bind_group(), &[]);
-    rpass.set_bind_group(2, self.prims_storage.bind_group(), &[]);
+    rpass.set_bind_group(1, self.stops_uniform.bind_group(), &[]);
+    rpass.set_bind_group(2, self.prims_uniform.bind_group(), &[]);
     rpass.set_bind_group(3, textures_bind, &[]);
 
     rpass.set_pipeline(pipeline);
diff --git a/gpu/src/wgpu_impl/draw_radial_gradient_pass.rs b/gpu/src/wgpu_impl/draw_radial_gradient_pass.rs
index 0e2583b98..54e065f5c 100644
--- a/gpu/src/wgpu_impl/draw_radial_gradient_pass.rs
+++ b/gpu/src/wgpu_impl/draw_radial_gradient_pass.rs
@@ -1,8 +1,12 @@
 use std::{mem::size_of, ops::Range};
 
 use ribir_painter::{Color, Vertex, VertexBuffers};
+use wgpu::include_wgsl;
 
-use super::{storage::Storage, vertex_buffer::VerticesBuffer};
+use super::{
+  uniform::Uniform, vertex_buffer::VerticesBuffer, MAX_GRADIENT_STOP_PRIMS,
+  MAX_RADIAL_GRADIENT_PRIMS,
+};
 use crate::{
   GradientStopPrimitive, MaskLayer, RadialGradientPrimIndex, RadialGradientPrimitive, WgpuTexture,
 };
@@ -12,8 +16,8 @@ pub struct DrawRadialGradientTrianglesPass {
   pipeline: Option<wgpu::RenderPipeline>,
   shader: wgpu::ShaderModule,
   format: Option<wgpu::TextureFormat>,
-  prims_storage: Storage<RadialGradientPrimitive>,
-  stops_storage: Storage<GradientStopPrimitive>,
+  prims_uniform: Uniform<RadialGradientPrimitive>,
+  stops_uniform: Uniform<GradientStopPrimitive>,
   layout: wgpu::PipelineLayout,
 }
 
@@ -22,14 +26,11 @@ impl DrawRadialGradientTrianglesPass {
     device: &wgpu::Device, mask_layout: &wgpu::BindGroupLayout, texs_layout: &wgpu::BindGroupLayout,
   ) -> Self {
     let vertices_buffer = VerticesBuffer::new(512, 1024, device);
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Radial triangles pass"),
-      source: wgpu::ShaderSource::Wgsl(
-        include_str!("./shaders/radial_gradient_triangles.wgsl").into(),
-      ),
-    });
-    let prims_storage = Storage::new(device, wgpu::ShaderStages::FRAGMENT, 64);
-    let stops_storage = Storage::new(device, wgpu::ShaderStages::FRAGMENT, 64);
+    let shader =
+      device.create_shader_module(include_wgsl!("./shaders/radial_gradient_triangles.wgsl"));
+    let prims_storage =
+      Uniform::new(device, wgpu::ShaderStages::FRAGMENT, MAX_RADIAL_GRADIENT_PRIMS);
+    let stops_storage = Uniform::new(device, wgpu::ShaderStages::FRAGMENT, MAX_GRADIENT_STOP_PRIMS);
     let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
       label: Some("update triangles pipeline layout"),
       bind_group_layouts: &[
@@ -46,8 +47,8 @@ impl DrawRadialGradientTrianglesPass {
       pipeline: None,
       shader,
       format: None,
-      prims_storage,
-      stops_storage,
+      prims_uniform: prims_storage,
+      stops_uniform: stops_storage,
       layout,
     }
   }
@@ -60,28 +61,21 @@ impl DrawRadialGradientTrianglesPass {
       .vertices_buffer
       .write_buffer(buffers, device, queue);
   }
-
   pub fn load_radial_gradient_primitives(
-    &mut self, device: &wgpu::Device, queue: &wgpu::Queue, primitives: &[RadialGradientPrimitive],
+    &mut self, queue: &wgpu::Queue, primitives: &[RadialGradientPrimitive],
   ) {
-    self
-      .prims_storage
-      .write_buffer(device, queue, primitives);
+    self.prims_uniform.write_buffer(queue, primitives);
   }
 
-  pub fn load_gradient_stops(
-    &mut self, device: &wgpu::Device, queue: &wgpu::Queue, stops: &[GradientStopPrimitive],
-  ) {
-    self
-      .stops_storage
-      .write_buffer(device, queue, stops);
+  pub fn load_gradient_stops(&mut self, queue: &wgpu::Queue, stops: &[GradientStopPrimitive]) {
+    self.stops_uniform.write_buffer(queue, stops);
   }
 
   #[allow(clippy::too_many_arguments)]
   pub fn draw_triangles(
     &mut self, texture: &WgpuTexture, indices: Range<u32>, clear: Option<Color>,
     device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, textures_bind: &wgpu::BindGroup,
-    mask_layer_storage: &Storage<MaskLayer>,
+    mask_layer_storage: &Uniform<MaskLayer>,
   ) {
     self.update(texture.format(), device);
     let pipeline = self.pipeline.as_ref().unwrap();
@@ -98,8 +92,8 @@ impl DrawRadialGradientTrianglesPass {
     rpass.set_vertex_buffer(0, self.vertices_buffer.vertices().slice(..));
     rpass.set_index_buffer(self.vertices_buffer.indices().slice(..), wgpu::IndexFormat::Uint32);
     rpass.set_bind_group(0, mask_layer_storage.bind_group(), &[]);
-    rpass.set_bind_group(1, self.stops_storage.bind_group(), &[]);
-    rpass.set_bind_group(2, self.prims_storage.bind_group(), &[]);
+    rpass.set_bind_group(1, self.stops_uniform.bind_group(), &[]);
+    rpass.set_bind_group(2, self.prims_uniform.bind_group(), &[]);
     rpass.set_bind_group(3, textures_bind, &[]);
 
     rpass.set_pipeline(pipeline);
diff --git a/gpu/src/wgpu_impl/draw_texture_pass.rs b/gpu/src/wgpu_impl/draw_texture_pass.rs
index d36d6afcc..5b610ff8b 100644
--- a/gpu/src/wgpu_impl/draw_texture_pass.rs
+++ b/gpu/src/wgpu_impl/draw_texture_pass.rs
@@ -2,7 +2,7 @@ use std::mem::size_of;
 
 use ribir_geom::{rect_corners, DevicePoint, DeviceRect, DeviceSize};
 use ribir_painter::Vertex;
-use wgpu::StoreOp;
+use wgpu::{include_wgsl, StoreOp};
 
 use super::buffer_pool::BufferPool;
 use crate::{command_encoder, gpu_backend::Texture, vertices_coord, WgpuImpl, WgpuTexture};
@@ -19,10 +19,7 @@ pub struct DrawTexturePass {
 
 impl DrawTexturePass {
   pub fn new(device: &wgpu::Device) -> Self {
-    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
-      label: Some("Draw texture to texture"),
-      source: wgpu::ShaderSource::Wgsl(include_str!("./shaders/tex_2_tex.wgsl").into()),
-    });
+    let shader = device.create_shader_module(include_wgsl!("./shaders/tex_2_tex.wgsl"));
 
     let bind_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
       entries: &[
diff --git a/gpu/src/wgpu_impl/shaders/color_triangles.wgsl b/gpu/src/wgpu_impl/shaders/color_triangles.wgsl
index 6196f7cdf..a6d6322f3 100644
--- a/gpu/src/wgpu_impl/shaders/color_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/color_triangles.wgsl
@@ -23,16 +23,30 @@ fn vs_main(v: Vertex) -> FragInput {
 }
 
 
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct MaskLayer {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   min: vec2<f32>,
   max: vec2<f32>,
+  @align(4)
   mask_tex_idx: u32,
+  @align(4)
   prev_mask_idx: i32,
 }
 
+fn mask_matrix(mask: MaskLayer) -> mat3x2<f32> {
+  return mat3x2(
+    mask.t0, 
+    mask.t1, 
+    mask.t2
+  );
+}
+
 @group(0) @binding(0) 
-var<storage> mask_layers: array<MaskLayer>;
+var<uniform> mask_layers: array<MaskLayer, 1365>;
 
 @group(1) @binding(0)
 var s_sampler: sampler;
@@ -64,7 +78,7 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         }
         let mask = mask_layers[u32(mask_idx)];
 
-        var mask_pos = mask.transform * vec3(input.pos.xy, 1.);
+        var mask_pos = mask_matrix(mask) * vec3(input.pos.xy, 1.);
         if any(mask_pos < mask.min) || any(mask.max < mask_pos) {
             color.a = 0.;
             break;
diff --git a/gpu/src/wgpu_impl/shaders/img_triangles.wgsl b/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
index 463665e52..f5dc29f19 100644
--- a/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/img_triangles.wgsl
@@ -3,9 +3,13 @@ struct VertexInput {
   @location(1) prim_idx: u32,
 }
 
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct ImgPrimitive {
   /// Transform a vertex position to a image texture position.
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   /// The origin of the image placed in texture.
   img_start: vec2<f32>,
   /// The size of the image image.
@@ -24,7 +28,7 @@ struct VertexOutput {
 }
 
 @group(0) @binding(0) 
-var<storage> mask_layers: array<MaskLayer>;
+var<uniform> mask_layers: array<MaskLayer, 1365>;
 
 @vertex
 fn vs_main(v: VertexInput) -> VertexOutput {
@@ -37,18 +41,31 @@ fn vs_main(v: VertexInput) -> VertexOutput {
     return o;
 }
 
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct MaskLayer {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   min: vec2<f32>,
   max: vec2<f32>,
+  @align(4)
   mask_tex_idx: u32,
+  @align(4)
   prev_mask_idx: i32,
 }
 
+fn mask_matrix(mask: MaskLayer) -> mat3x2<f32> {
+  return mat3x2(mask.t0, mask.t1, mask.t2);
+}
+
+fn img_prim_matrix(img: ImgPrimitive) -> mat3x2<f32> {
+  return mat3x2(img.t0, img.t1, img.t2);
+}
 
 
 @group(1) @binding(0) 
-var<storage> primtives: array<ImgPrimitive>;
+var<uniform> primtives: array<ImgPrimitive, 1024>;
 
 @group(2) @binding(0)
 var s_sampler: sampler;
@@ -74,48 +91,48 @@ var tex_7: texture_2d<f32>;
 fn fs_main(f: VertexOutput) -> @location(0) vec4<f32> {
     let prim = primtives[f.prim_idx];
     var color: vec4<f32>;
-    let pos = prim.transform * f.pos.xyz;
+    let pos = img_prim_matrix(prim) * f.pos.xyz;
     var img_pos = pos.xy % prim.img_size + prim.img_start;
     switch abs(prim.mask_head_and_tex_idx & 0x0000FFFF) {
         case 0: {
             let img_tex_size = textureDimensions(tex_0);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_0, s_sampler, img_pos);
+            color = textureSampleLevel(tex_0, s_sampler, img_pos, 0.);
         }
         case 1: {
             let img_tex_size = textureDimensions(tex_1);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_1, s_sampler, img_pos);
+            color = textureSampleLevel(tex_1, s_sampler, img_pos, 0.);
         }
         case 2: {
             let img_tex_size = textureDimensions(tex_2);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_2, s_sampler, img_pos);
+            color = textureSampleLevel(tex_2, s_sampler, img_pos, 0.);
         }
         case 3: {
             let img_tex_size = textureDimensions(tex_3);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_3, s_sampler, img_pos);
+            color = textureSampleLevel(tex_3, s_sampler, img_pos, 0.);
         }
         case 4: {
             let img_tex_size = textureDimensions(tex_4);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_4, s_sampler, img_pos);
+            color = textureSampleLevel(tex_4, s_sampler, img_pos, 0.);
         }
         case 5: {
             let img_tex_size = textureDimensions(tex_5);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_5, s_sampler, img_pos);
+            color = textureSampleLevel(tex_5, s_sampler, img_pos, 0.);
         }
         case 6: {
             let img_tex_size = textureDimensions(tex_6);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_6, s_sampler, img_pos);
+            color = textureSampleLevel(tex_6, s_sampler, img_pos, 0.);
         }
         case 7: {
             let img_tex_size = textureDimensions(tex_7);
             img_pos = img_pos / vec2<f32>(f32(img_tex_size.x), f32(img_tex_size.y));
-            color = textureSample(tex_7, s_sampler, img_pos);
+            color = textureSampleLevel(tex_7, s_sampler, img_pos, 0.);
         }
         default: { color = vec4<f32>(1., 0., 0., 1.); }
       };
@@ -127,7 +144,7 @@ fn fs_main(f: VertexOutput) -> @location(0) vec4<f32> {
         }
 
         let mask = mask_layers[u32(mask_idx)];
-        var mask_pos = mask.transform * vec3(f.pos.xy, 1.0);
+        var mask_pos = mask_matrix(mask) * vec3(f.pos.xy, 1.0);
         if any(mask_pos < mask.min) || any(mask.max < mask_pos) {
             color.a = 0.;
             break;
diff --git a/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl b/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
index 0f5db25d1..449772b9d 100644
--- a/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/linear_gradient_triangles.wgsl
@@ -18,9 +18,12 @@ fn vs_main(v: Vertex) -> FragInput {
     return input;
 }
 
-
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct MaskLayer {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   min: vec2<f32>,
   max: vec2<f32>,
   mask_tex_idx: u32,
@@ -40,8 +43,12 @@ struct Stop {
     offset: f32,
 }
 
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct Primitive {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   start_position: vec2<f32>,
   end_position: vec2<f32>,
   // A value mixed stop_start(u16) and stop_cnt(u16)
@@ -50,14 +57,23 @@ struct Primitive {
   mask_head_and_spread: i32
 }
 
+fn mask_matrix(mask: MaskLayer) -> mat3x2<f32> {
+  return mat3x2(mask.t0, mask.t1, mask.t2);
+}
+
+fn prim_matrix(img: Primitive) -> mat3x2<f32> {
+  return mat3x2(img.t0, img.t1, img.t2);
+}
+
+
 @group(0) @binding(0) 
-var<storage> mask_layers: array<MaskLayer>;
+var<uniform> mask_layers: array<MaskLayer, 1365>;
 
 @group(1) @binding(0)
-var<storage> stops: array<StopPair>;
+var<uniform> stops: array<StopPair, 256>;
 
 @group(2) @binding(0)
-var<storage> prims: array<Primitive>;
+var<uniform> prims: array<Primitive, 512>;
 
 @group(3) @binding(0)
 var s_sampler: sampler;
@@ -109,7 +125,7 @@ fn get_stop(idx: u32)  -> Stop {
 @fragment
 fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
     let prim = prims[input.prim_idx];
-    let pos = prim.transform * vec3(input.pos.xy, 1.);
+    let pos = prim_matrix(prim) * vec3(input.pos.xy, 1.);
     var alpha = 1.;
     var mask_idx = prim.mask_head_and_spread >> 16;
 
@@ -119,7 +135,7 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         }
         let mask = mask_layers[u32(mask_idx)];
 
-        var mask_pos = mask.transform * vec3(input.pos.xy, 1.);
+        var mask_pos = mask_matrix(mask) * vec3(input.pos.xy, 1.);
         if any(mask_pos < mask.min) || any(mask.max < mask_pos) {
             alpha = 0.;
             break;
diff --git a/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl b/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
index 411db8601..0edeffef6 100644
--- a/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
+++ b/gpu/src/wgpu_impl/shaders/radial_gradient_triangles.wgsl
@@ -18,9 +18,12 @@ fn vs_main(v: Vertex) -> FragInput {
     return input;
 }
 
-
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct MaskLayer {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   min: vec2<f32>,
   max: vec2<f32>,
   mask_tex_idx: u32,
@@ -40,8 +43,12 @@ struct Stop {
     offset: f32,
 }
 
+// Since a the different alignment between WebGPU and WebGL, we not use 
+// mat3x2<f32> in the struct, but use vec2<f32> instead. Then, we compose it.
 struct Primitive {
-  transform: mat3x2<f32>,
+  t0: vec2<f32>,
+  t1: vec2<f32>,
+  t2: vec2<f32>,
   stop_start: u32,
   stop_cnt: u32,
   start_center: vec2<f32>,
@@ -53,13 +60,13 @@ struct Primitive {
 }
 
 @group(0) @binding(0) 
-var<storage> mask_layers: array<MaskLayer>;
+var<uniform> mask_layers: array<MaskLayer, 1365>;
 
 @group(1) @binding(0)
-var<storage> stops: array<StopPair>;
+var<uniform> stops: array<StopPair, 256>;
 
 @group(2) @binding(0)
-var<storage> prims: array<Primitive>;
+var<uniform> prims: array<Primitive, 512>;
 
 @group(3) @binding(0)
 var s_sampler: sampler;
@@ -99,10 +106,19 @@ fn get_stop(idx: u32)  -> Stop {
     }
 }
 
+fn mask_matrix(mask: MaskLayer) -> mat3x2<f32> {
+  return mat3x2(mask.t0, mask.t1, mask.t2);
+}
+
+fn prim_matrix(img: Primitive) -> mat3x2<f32> {
+  return mat3x2(img.t0, img.t1, img.t2);
+}
+
+
 @fragment
 fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
     let prim = prims[input.prim_idx];
-    let pos = prim.transform * vec3(input.pos.xy, 1.);
+    let pos = prim_matrix(prim) * vec3(input.pos.xy, 1.);
 
     var alpha = 1.;
     var mask_idx = prim.mask_head;
@@ -112,7 +128,7 @@ fn fs_main(input: FragInput) -> @location(0) vec4<f32> {
         }
         let mask = mask_layers[u32(mask_idx)];
 
-        var mask_pos = mask.transform * vec3(input.pos.xy, 1.);
+        var mask_pos = mask_matrix(mask) * vec3(input.pos.xy, 1.);
         if any(mask_pos < mask.min) || any(mask.max < mask_pos) {
             alpha = 0.;
             break;
diff --git a/gpu/src/wgpu_impl/storage.rs b/gpu/src/wgpu_impl/uniform.rs
similarity index 51%
rename from gpu/src/wgpu_impl/storage.rs
rename to gpu/src/wgpu_impl/uniform.rs
index 197329d8e..5ddd57bec 100644
--- a/gpu/src/wgpu_impl/storage.rs
+++ b/gpu/src/wgpu_impl/uniform.rs
@@ -1,65 +1,52 @@
-use std::{any::type_name, marker::PhantomData, mem::size_of};
+use std::{any::type_name, marker::PhantomData};
 
 use zerocopy::AsBytes;
 
-pub struct Storage<T> {
+pub struct Uniform<T> {
   layout: wgpu::BindGroupLayout,
   buffer: wgpu::Buffer,
   bind: wgpu::BindGroup,
   _phantom: PhantomData<T>,
 }
 
-impl<T: AsBytes> Storage<T> {
-  pub fn new(device: &wgpu::Device, visibility: wgpu::ShaderStages, init_count: usize) -> Self {
+impl<T: AsBytes> Uniform<T> {
+  pub fn new(device: &wgpu::Device, visibility: wgpu::ShaderStages, elements: usize) -> Self {
     let layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
       entries: &[wgpu::BindGroupLayoutEntry {
         binding: 0,
         visibility,
         ty: wgpu::BindingType::Buffer {
-          ty: wgpu::BufferBindingType::Storage { read_only: true },
+          ty: wgpu::BufferBindingType::Uniform,
           has_dynamic_offset: false,
           min_binding_size: None,
         },
         count: None,
       }],
-      label: Some(&format!("{} storage layout", type_name::<T>())),
+      label: Some(&format!("{} uniform layout", type_name::<T>())),
     });
-    let (buffer, bind) =
-      Self::new_bind(device, &layout, (size_of::<T>() * init_count) as wgpu::BufferAddress);
-    let _phantom = PhantomData;
-    Self { layout, buffer, bind, _phantom }
-  }
-
-  pub fn write_buffer(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, data: &[T]) {
-    let buffer_size = (std::mem::size_of_val(data)) as wgpu::BufferAddress;
-    if self.buffer.size() < buffer_size {
-      (self.buffer, self.bind) = Self::new_bind(device, &self.layout, buffer_size);
-    }
-
-    queue.write_buffer(&self.buffer, 0, data.as_bytes());
-  }
-
-  pub fn bind_group(&self) -> &wgpu::BindGroup { &self.bind }
-  pub fn layout(&self) -> &wgpu::BindGroupLayout { &self.layout }
-
-  fn new_bind(
-    device: &wgpu::Device, layout: &wgpu::BindGroupLayout, bytes: wgpu::BufferAddress,
-  ) -> (wgpu::Buffer, wgpu::BindGroup) {
     let buffer = device.create_buffer(&wgpu::BufferDescriptor {
-      label: Some(&format!("{} storage buffer", type_name::<T>())),
-      size: bytes,
-      usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+      label: Some(&format!("{} uniform buffer", type_name::<T>())),
+      size: (std::mem::size_of::<T>() * elements) as u64,
+      usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
       mapped_at_creation: false,
     });
     let bind = device.create_bind_group(&wgpu::BindGroupDescriptor {
-      label: Some(&format!("{} storage bind", type_name::<T>())),
-      layout,
+      label: Some(&format!("{} uniform bind", type_name::<T>())),
+      layout: &layout,
       entries: &[wgpu::BindGroupEntry {
         binding: 0,
         resource: wgpu::BindingResource::Buffer(buffer.as_entire_buffer_binding()),
       }],
     });
 
-    (buffer, bind)
+    let _phantom = PhantomData;
+    Self { layout, buffer, bind, _phantom }
   }
+
+  pub fn write_buffer(&mut self, queue: &wgpu::Queue, data: &[T]) {
+    queue.write_buffer(&self.buffer, 0, data.as_bytes());
+  }
+
+  pub fn bind_group(&self) -> &wgpu::BindGroup { &self.bind }
+  pub fn layout(&self) -> &wgpu::BindGroupLayout { &self.layout }
 }

From 901c46b67651cf4fee0adc094ce921fb1cb4d291 Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Wed, 15 May 2024 21:59:49 +0800
Subject: [PATCH 4/6] =?UTF-8?q?fix(gpu):=20=F0=9F=90=9B=20limit=20the=20al?=
 =?UTF-8?q?pha=20cache=20less=20than=20the=20gpu=20texture=20size=20limit?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gpu/src/gpu_backend.rs              | 26 +++++++-------
 gpu/src/gpu_backend/atlas.rs        |  3 ++
 gpu/src/gpu_backend/textures_mgr.rs | 54 ++++++++++++++++-------------
 3 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/gpu/src/gpu_backend.rs b/gpu/src/gpu_backend.rs
index 0812bdd56..5dafb726f 100644
--- a/gpu/src/gpu_backend.rs
+++ b/gpu/src/gpu_backend.rs
@@ -381,18 +381,20 @@ where
   fn new_mask_layer(&mut self, path: PaintPath) -> Option<([Point; 4], i32)> {
     let paint_bounds = path.paint_bounds.round_out().to_i32().cast_unit();
     let view = paint_bounds.intersection(self.viewport())?;
-    let prefer_cache_size = prefer_cache_size(&path.path, &path.transform);
-
-    let (mask, mask_to_view) =
-      if self.tex_mgr.is_good_for_cache(prefer_cache_size) || view.contains_rect(&paint_bounds) {
-        self
-          .tex_mgr
-          .store_alpha_path(path.path, &path.transform, &mut self.gpu_impl)
-      } else {
-        self
-          .tex_mgr
-          .store_clipped_path(view, path, &mut self.gpu_impl)
-      };
+
+    let (mask, mask_to_view) = if self
+      .tex_mgr
+      .is_good_for_cache(path.paint_bounds.size.to_i32().cast_unit())
+      || view.contains_rect(&paint_bounds)
+    {
+      self
+        .tex_mgr
+        .store_alpha_path(path.path, &path.transform, &mut self.gpu_impl)
+    } else {
+      self
+        .tex_mgr
+        .store_clipped_path(view, path, &mut self.gpu_impl)
+    };
 
     let mut points = rect_corners(&mask.rect.to_f32().cast_unit());
     for p in points.iter_mut() {
diff --git a/gpu/src/gpu_backend/atlas.rs b/gpu/src/gpu_backend/atlas.rs
index 88183d631..c9dc4b192 100644
--- a/gpu/src/gpu_backend/atlas.rs
+++ b/gpu/src/gpu_backend/atlas.rs
@@ -158,6 +158,9 @@ where
 
   pub fn size(&self) -> DeviceSize { self.texture.size() }
 
+  /// The max size of the atlas can be.
+  pub fn max_size(&self) -> DeviceSize { self.config.max_size }
+
   pub fn clear(&mut self) {
     self.cache.clear();
     self.atlas_allocator.clear();
diff --git a/gpu/src/gpu_backend/textures_mgr.rs b/gpu/src/gpu_backend/textures_mgr.rs
index b2559d7d6..25e4145ec 100644
--- a/gpu/src/gpu_backend/textures_mgr.rs
+++ b/gpu/src/gpu_backend/textures_mgr.rs
@@ -65,9 +65,17 @@ macro_rules! id_to_texture {
   };
 }
 
-fn get_transform_pref_scale(transform: &Transform) -> f32 {
+fn get_prefer_scale(transform: &Transform, size: DeviceSize, max_size: DeviceSize) -> f32 {
+  // 2 * BLANK_EDGE is the blank edge for each side.
+  let max_width = (max_size.width - 2 * BLANK_EDGE) as f32;
+  let max_height = (max_size.height - 2 * BLANK_EDGE) as f32;
+
+  let max_scale = (max_width / size.width as f32).min(max_height / size.height as f32);
+
   let Transform { m11, m12, m21, m22, .. } = *transform;
-  (m11.abs() + m12.abs()).max(m21.abs() + m22.abs())
+  (m11.abs() + m12.abs())
+    .max(m21.abs() + m22.abs())
+    .min(max_scale)
 }
 
 impl<T: Texture> TexturesMgr<T>
@@ -127,7 +135,11 @@ where
         .then(path_ts)
     }
 
-    let prefer_scale: f32 = get_transform_pref_scale(transform);
+    let prefer_scale: f32 = get_prefer_scale(
+      transform,
+      path.bounds().size.to_i32().cast_unit(),
+      self.alpha_atlas.max_size(),
+    );
     let key = PathKey::from_path(path);
 
     if let Some(h) = self
@@ -142,7 +154,7 @@ where
     } else {
       let path = key.path().clone();
       let scale_bounds = path.bounds().scale(prefer_scale, prefer_scale);
-      let prefer_cache_size = path_add_edges(scale_bounds.round_out().size.to_i32().cast_unit());
+      let prefer_cache_size = add_blank_edges(scale_bounds.round_out().size.to_i32().cast_unit());
 
       let h = self
         .alpha_atlas
@@ -174,7 +186,7 @@ where
   pub(super) fn store_clipped_path(
     &mut self, clip_view: DeviceRect, path: PaintPath, gpu_impl: &mut T::Host,
   ) -> (TextureSlice, Transform) {
-    let alloc_size: DeviceSize = path_add_edges(clip_view.size);
+    let alloc_size: DeviceSize = add_blank_edges(clip_view.size);
     let path_ts = path.transform;
 
     let key = PathKey::from_path_with_clip(path, clip_view);
@@ -229,7 +241,7 @@ where
 
   fn fill_tess(
     path: &Path, ts: &Transform, tex_size: DeviceSize, slice_bounds: &DeviceRect,
-    buffer: &mut VertexBuffers<f32>,
+    buffer: &mut VertexBuffers<f32>, max_size: DeviceSize,
   ) -> Range<u32> {
     let start = buffer.indices.len() as u32;
 
@@ -239,7 +251,7 @@ where
     let tex_width = tex_size.width as f32;
     let tex_height = tex_size.height as f32;
 
-    let scale = get_transform_pref_scale(ts);
+    let scale = get_prefer_scale(ts, tex_size, max_size);
 
     path.tessellate(TOLERANCE / scale, buffer, |pos| {
       let pos = ts.transform_point(pos);
@@ -270,8 +282,14 @@ where
       for f in self.fill_task.iter() {
         let FillTask { slice, path, clip_rect, ts } = f;
         let texture = id_to_texture!(self, slice.tex_id);
-        let rg =
-          Self::fill_tess(path, ts, texture.size(), &slice.rect, &mut self.fill_task_buffers);
+        let rg = Self::fill_tess(
+          path,
+          ts,
+          texture.size(),
+          &slice.rect,
+          &mut self.fill_task_buffers,
+          self.alpha_atlas.max_size(),
+        );
         draw_indices.push((slice.tex_id, rg, clip_rect));
       }
     } else {
@@ -281,14 +299,14 @@ where
         let texture = id_to_texture!(self, slice.tex_id);
         tasks.push((slice, ts, texture.size(), slice.rect, path, clip_rect));
       }
-
+      let max_size = self.alpha_atlas.max_size();
       let par_tess_res = tasks
         .par_chunks(PAR_CHUNKS_SIZE)
         .map(|tasks| {
           let mut buffer = VertexBuffers::default();
           let mut indices = Vec::with_capacity(tasks.len());
           for (slice, ts, tex_size, slice_bounds, path, clip_rect) in tasks.iter() {
-            let rg = Self::fill_tess(path, ts, *tex_size, slice_bounds, &mut buffer);
+            let rg = Self::fill_tess(path, ts, *tex_size, slice_bounds, &mut buffer, max_size);
             indices.push((slice.tex_id, rg, *clip_rect));
           }
           (indices, buffer)
@@ -389,7 +407,7 @@ fn extend_buffer<V>(dist: &mut VertexBuffers<V>, from: VertexBuffers<V>) {
 
 const BLANK_EDGE: i32 = 2;
 
-fn path_add_edges(mut size: DeviceSize) -> DeviceSize {
+fn add_blank_edges(mut size: DeviceSize) -> DeviceSize {
   size.width += BLANK_EDGE * 2;
   size.height += BLANK_EDGE * 2;
   size
@@ -530,18 +548,6 @@ impl PartialEq for PathKey {
 
 impl Eq for PathKey {}
 
-pub fn prefer_cache_size(path: &Path, transform: &Transform) -> DeviceSize {
-  let prefer_scale: f32 = get_transform_pref_scale(transform);
-  let prefer_cache_size = path
-    .bounds()
-    .scale(prefer_scale, prefer_scale)
-    .round_out()
-    .size
-    .to_i32()
-    .cast_unit();
-  path_add_edges(prefer_cache_size)
-}
-
 #[cfg(feature = "wgpu")]
 #[cfg(test)]
 pub mod tests {

From 4703526eba191f7fd4991790cda5a547f84a6438 Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Thu, 16 May 2024 16:07:16 +0800
Subject: [PATCH 5/6] =?UTF-8?q?fix(gpu):=20=F0=9F=90=9B=20To=20ensure=20su?=
 =?UTF-8?q?ccessful=20device=20and=20surface=20creation=20with=20a=20corre?=
 =?UTF-8?q?ct=20configuration?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gpu/src/wgpu_impl.rs               | 130 ++++++++++++++++++++++-------
 ribir/src/backends/wgpu_backend.rs |  59 +++----------
 2 files changed, 109 insertions(+), 80 deletions(-)

diff --git a/gpu/src/wgpu_impl.rs b/gpu/src/wgpu_impl.rs
index 0d2ccab35..fb16f8648 100644
--- a/gpu/src/wgpu_impl.rs
+++ b/gpu/src/wgpu_impl.rs
@@ -69,6 +69,12 @@ macro_rules! command_encoder {
 }
 pub(crate) use command_encoder;
 
+pub struct Surface<'a> {
+  surface: wgpu::Surface<'a>,
+  config: wgpu::SurfaceConfiguration,
+  current_texture: Option<WgpuTexture>,
+}
+
 impl GPUBackendImpl for WgpuImpl {
   type Texture = WgpuTexture;
 
@@ -303,6 +309,38 @@ impl GPUBackendImpl for WgpuImpl {
   }
 }
 
+impl<'a> Surface<'a> {
+  /// Resize the surface to the given size.
+  pub fn resize(&mut self, size: DeviceSize, backend: &WgpuImpl) {
+    self.config.width = size.width as u32;
+    self.config.height = size.height as u32;
+    if !size.is_empty() {
+      self
+        .surface
+        .configure(backend.device(), &self.config);
+    }
+  }
+
+  /// Get the size of the surface.
+  pub fn size(&self) -> DeviceSize {
+    DeviceSize::new(self.config.width as i32, self.config.height as i32)
+  }
+
+  pub fn get_current_texture(&mut self) -> &mut WgpuTexture {
+    self.current_texture.get_or_insert_with(|| {
+      let tex = self.surface.get_current_texture().unwrap();
+      WgpuTexture::new(InnerTexture::SurfaceTexture(tex))
+    })
+  }
+
+  /// Present the current texture to the surface.
+  pub fn present(&mut self) {
+    if let Some(tex) = self.current_texture.take() {
+      let InnerTexture::SurfaceTexture(tex) = tex.inner_tex else { unreachable!() };
+      tex.present()
+    }
+  }
+}
 pub struct WgpuTexture {
   inner_tex: InnerTexture,
   view: wgpu::TextureView,
@@ -326,25 +364,7 @@ impl InnerTexture {
 }
 
 impl WgpuTexture {
-  pub fn from_tex(tex: wgpu::Texture) -> Self { Self::new(InnerTexture::Texture(tex)) }
-
-  pub fn from_surface_tex(tex: wgpu::SurfaceTexture) -> Self {
-    Self::new(InnerTexture::SurfaceTexture(tex))
-  }
-
-  pub fn into_texture(self) -> Option<wgpu::Texture> {
-    match self.inner_tex {
-      InnerTexture::Texture(texture) => Some(texture),
-      InnerTexture::SurfaceTexture(_) => None,
-    }
-  }
-
-  pub fn into_surface_texture(self) -> Option<wgpu::SurfaceTexture> {
-    match self.inner_tex {
-      InnerTexture::Texture(_) => None,
-      InnerTexture::SurfaceTexture(tex) => Some(tex),
-    }
-  }
+  fn from_tex(tex: wgpu::Texture) -> Self { Self::new(InnerTexture::Texture(tex)) }
 
   pub(crate) fn color_attachments(&self, clear: Option<Color>) -> wgpu::RenderPassColorAttachment {
     let load = match clear {
@@ -535,16 +555,43 @@ impl Texture for WgpuTexture {
 }
 
 impl WgpuImpl {
-  pub async fn headless() -> Self {
-    let instance = wgpu::Instance::new(<_>::default());
-    Self::new(instance, None).await
+  /// Create a new instance of `WgpuImpl` with a headless surface.
+  pub async fn headless() -> Self { Self::create(None).await.0 }
+
+  /// Create a new instance of `WgpuImpl` with a surface target and also return
+  /// the surface.
+  pub async fn new<'a>(target: impl Into<wgpu::SurfaceTarget<'a>>) -> (Self, Surface<'a>) {
+    let (gpu_impl, surface) = Self::create(Some(target.into())).await;
+    (gpu_impl, surface.unwrap())
   }
 
-  pub async fn new(instance: wgpu::Instance, surface: Option<&wgpu::Surface<'_>>) -> WgpuImpl {
+  #[allow(clippy::needless_lifetimes)]
+  async fn create<'a>(target: Option<wgpu::SurfaceTarget<'a>>) -> (WgpuImpl, Option<Surface<'a>>) {
+    let mut instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
+      backends: wgpu::Backends::PRIMARY,
+      ..<_>::default()
+    });
+
+    // This detection mechanism might be deprecated in the future. Ideally, we
+    // should be able to create instances with `wgpu::Backends::all()`. However,
+    // this currently may not correctly on browsers when WebGPU is insufficient.
+    // See https://github.com/gfx-rs/wgpu/issues/5332 for more details.
+    if instance
+      .request_adapter(&<_>::default())
+      .await
+      .is_none()
+    {
+      instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
+        backends: wgpu::Backends::SECONDARY,
+        ..<_>::default()
+      });
+    }
+
+    let surface = target.map(|t| instance.create_surface(t).unwrap());
     let adapter = instance
       .request_adapter(&wgpu::RequestAdapterOptions {
         power_preference: wgpu::PowerPreference::default(),
-        compatible_surface: surface,
+        compatible_surface: surface.as_ref(),
         force_fallback_adapter: false,
       })
       .await
@@ -552,10 +599,7 @@ impl WgpuImpl {
 
     let (device, queue) = adapter
       .request_device(
-        &wgpu::DeviceDescriptor {
-          required_limits: wgpu::Limits::downlevel_webgl2_defaults(),
-          ..Default::default()
-        },
+        &wgpu::DeviceDescriptor { required_limits: adapter.limits(), ..Default::default() },
         None,
       )
       .await
@@ -610,13 +654,12 @@ impl WgpuImpl {
       DrawRadialGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     let linear_gradient_pass =
       DrawLinearGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
-    WgpuImpl {
+    let gpu_impl = WgpuImpl {
       device,
       queue,
       command_encoder: None,
       command_buffers: vec![],
       sampler,
-
       draw_tex_pass,
       alpha_triangles_pass,
       color_triangles_pass,
@@ -627,7 +670,32 @@ impl WgpuImpl {
       textures_bind: None,
       mask_layers_uniform,
       limits,
-    }
+    };
+
+    let surface = surface.map(|surface| {
+      use wgpu::TextureFormat::*;
+      let format = surface
+        .get_capabilities(&adapter)
+        .formats
+        .into_iter()
+        .find(|&f| f == Rgba8Unorm || f == Bgra8Unorm)
+        .expect("No suitable format found for the surface!");
+
+      let config = wgpu::SurfaceConfiguration {
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+        format,
+        width: 0,
+        height: 0,
+        present_mode: wgpu::PresentMode::Fifo,
+        alpha_mode: wgpu::CompositeAlphaMode::Auto,
+        view_formats: vec![format],
+        desired_maximum_frame_latency: 2,
+      };
+
+      Surface { surface, config, current_texture: None }
+    });
+
+    (gpu_impl, surface)
   }
 
   pub fn start_capture(&self) { self.device.start_capture(); }
diff --git a/ribir/src/backends/wgpu_backend.rs b/ribir/src/backends/wgpu_backend.rs
index 6087f2328..deaeb442a 100644
--- a/ribir/src/backends/wgpu_backend.rs
+++ b/ribir/src/backends/wgpu_backend.rs
@@ -1,83 +1,44 @@
 use ribir_core::prelude::{
   AntiAliasing, Color, DeviceRect, DeviceSize, PaintCommand, PainterBackend,
 };
-use ribir_gpu::WgpuTexture;
+use ribir_gpu::Surface;
 
 use crate::winit_shell_wnd::WinitBackend;
 
 pub struct WgpuBackend<'a> {
-  size: DeviceSize,
-  surface: wgpu::Surface<'a>,
+  surface: Surface<'a>,
   backend: ribir_gpu::GPUBackend<ribir_gpu::WgpuImpl>,
-  current_texture: Option<ribir_gpu::WgpuTexture>,
 }
 
 impl<'a> WinitBackend<'a> for WgpuBackend<'a> {
   async fn new(window: &'a winit::window::Window) -> WgpuBackend<'a> {
-    let instance = wgpu::Instance::new(<_>::default());
-    let surface = instance.create_surface(window).unwrap();
-    let wgpu = ribir_gpu::WgpuImpl::new(instance, Some(&surface)).await;
+    let (wgpu, surface) = ribir_gpu::WgpuImpl::new(window).await;
     let size = window.inner_size();
     let size = DeviceSize::new(size.width as i32, size.height as i32);
 
-    let mut wgpu = WgpuBackend {
-      size: DeviceSize::zero(),
-      surface,
-      backend: ribir_gpu::GPUBackend::new(wgpu, AntiAliasing::Msaa4X),
-      current_texture: None,
-    };
+    let mut wgpu =
+      WgpuBackend { surface, backend: ribir_gpu::GPUBackend::new(wgpu, AntiAliasing::Msaa4X) };
     wgpu.on_resize(size);
 
     wgpu
   }
 
   fn on_resize(&mut self, size: DeviceSize) {
-    if !size.is_empty() && size != self.size {
-      self.size = size;
-      self.surface.configure(
-        self.backend.get_impl().device(),
-        &Self::surface_config(size.width as u32, size.height as u32),
-      );
+    if size != self.surface.size() {
+      self.surface.resize(size, self.backend.get_impl());
     }
   }
 
-  fn begin_frame(&mut self, surface_color: Color) {
-    self.backend.begin_frame(surface_color);
-    assert!(self.current_texture.is_none());
-    let surface_tex = self.surface.get_current_texture().unwrap();
-    self.current_texture = Some(WgpuTexture::from_surface_tex(surface_tex));
-  }
+  fn begin_frame(&mut self, surface_color: Color) { self.backend.begin_frame(surface_color); }
 
   fn draw_commands(&mut self, viewport: DeviceRect, commands: Vec<PaintCommand>) {
-    let surface = self.current_texture.as_mut().unwrap();
     self
       .backend
-      .draw_commands(viewport, commands, surface);
+      .draw_commands(viewport, commands, self.surface.get_current_texture());
   }
 
   fn end_frame(&mut self) {
     self.backend.end_frame();
-    let surface = self
-      .current_texture
-      .take()
-      .unwrap()
-      .into_surface_texture()
-      .unwrap();
-    surface.present();
-  }
-}
-
-impl<'a> WgpuBackend<'a> {
-  fn surface_config(width: u32, height: u32) -> wgpu::SurfaceConfiguration {
-    wgpu::SurfaceConfiguration {
-      usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-      format: wgpu::TextureFormat::Bgra8Unorm,
-      width,
-      height,
-      present_mode: wgpu::PresentMode::Fifo,
-      alpha_mode: wgpu::CompositeAlphaMode::Auto,
-      view_formats: vec![wgpu::TextureFormat::Bgra8Unorm],
-      desired_maximum_frame_latency: 2,
-    }
+    self.surface.present();
   }
 }

From 499051c0a429475f885060e04d4ad51fa6ccbb88 Mon Sep 17 00:00:00 2001
From: Adoo <Adoo@outlook.com>
Date: Fri, 17 May 2024 23:00:51 +0800
Subject: [PATCH 6/6] =?UTF-8?q?refactor(gpu):=20=F0=9F=92=A1=20lazy=20crat?=
 =?UTF-8?q?e=20triangles=20pass?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gpu/src/wgpu_impl.rs | 124 +++++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 47 deletions(-)

diff --git a/gpu/src/wgpu_impl.rs b/gpu/src/wgpu_impl.rs
index fb16f8648..008428d1c 100644
--- a/gpu/src/wgpu_impl.rs
+++ b/gpu/src/wgpu_impl.rs
@@ -48,10 +48,10 @@ pub struct WgpuImpl {
   sampler: wgpu::Sampler,
   draw_tex_pass: DrawTexturePass,
   alpha_triangles_pass: DrawAlphaTrianglesPass,
-  color_triangles_pass: DrawColorTrianglesPass,
-  img_triangles_pass: DrawImgTrianglesPass,
-  radial_gradient_pass: DrawRadialGradientTrianglesPass,
-  linear_gradient_pass: DrawLinearGradientTrianglesPass,
+  color_triangles_pass: Option<DrawColorTrianglesPass>,
+  img_triangles_pass: Option<DrawImgTrianglesPass>,
+  radial_gradient_pass: Option<DrawRadialGradientTrianglesPass>,
+  linear_gradient_pass: Option<DrawLinearGradientTrianglesPass>,
   texs_layout: wgpu::BindGroupLayout,
   textures_bind: Option<wgpu::BindGroup>,
   mask_layers_uniform: Uniform<MaskLayer>,
@@ -67,6 +67,62 @@ macro_rules! command_encoder {
     })
   };
 }
+macro_rules! color_pass {
+  ($backend:ident) => {
+    $backend
+      .color_triangles_pass
+      .get_or_insert_with(|| {
+        DrawColorTrianglesPass::new(
+          &$backend.device,
+          $backend.mask_layers_uniform.layout(),
+          &$backend.texs_layout,
+        )
+      })
+  };
+}
+
+macro_rules! img_pass {
+  ($backend:ident) => {
+    $backend
+      .img_triangles_pass
+      .get_or_insert_with(|| {
+        DrawImgTrianglesPass::new(
+          &$backend.device,
+          $backend.mask_layers_uniform.layout(),
+          &$backend.texs_layout,
+        )
+      })
+  };
+}
+
+macro_rules! radial_gradient_pass {
+  ($backend:ident) => {
+    $backend
+      .radial_gradient_pass
+      .get_or_insert_with(|| {
+        DrawRadialGradientTrianglesPass::new(
+          &$backend.device,
+          $backend.mask_layers_uniform.layout(),
+          &$backend.texs_layout,
+        )
+      })
+  };
+}
+
+macro_rules! linear_gradient_pass {
+  ($backend:ident) => {
+    $backend
+      .linear_gradient_pass
+      .get_or_insert_with(|| {
+        DrawLinearGradientTrianglesPass::new(
+          &$backend.device,
+          $backend.mask_layers_uniform.layout(),
+          &$backend.texs_layout,
+        )
+      })
+  };
+}
+
 pub(crate) use command_encoder;
 
 pub struct Surface<'a> {
@@ -131,57 +187,39 @@ impl GPUBackendImpl for WgpuImpl {
   }
 
   fn load_color_vertices(&mut self, buffers: &VertexBuffers<ColorAttr>) {
-    self
-      .color_triangles_pass
-      .load_triangles_vertices(buffers, &self.device, &self.queue);
+    color_pass!(self).load_triangles_vertices(buffers, &self.device, &self.queue);
   }
 
   fn load_img_vertices(&mut self, buffers: &VertexBuffers<ImagePrimIndex>) {
-    self
-      .img_triangles_pass
-      .load_triangles_vertices(buffers, &self.device, &self.queue);
+    img_pass!(self).load_triangles_vertices(buffers, &self.device, &self.queue);
   }
 
   fn load_img_primitives(&mut self, primitives: &[ImgPrimitive]) {
-    self
-      .img_triangles_pass
-      .load_img_primitives(&self.queue, primitives);
+    img_pass!(self).load_img_primitives(&self.queue, primitives);
   }
 
   fn load_radial_gradient_primitives(&mut self, primitives: &[RadialGradientPrimitive]) {
-    self
-      .radial_gradient_pass
-      .load_radial_gradient_primitives(&self.queue, primitives);
+    radial_gradient_pass!(self).load_radial_gradient_primitives(&self.queue, primitives);
   }
 
   fn load_radial_gradient_stops(&mut self, stops: &[GradientStopPrimitive]) {
-    self
-      .radial_gradient_pass
-      .load_gradient_stops(&self.queue, stops);
+    radial_gradient_pass!(self).load_gradient_stops(&self.queue, stops);
   }
 
   fn load_radial_gradient_vertices(&mut self, buffers: &VertexBuffers<RadialGradientPrimIndex>) {
-    self
-      .radial_gradient_pass
-      .load_triangles_vertices(buffers, &self.device, &self.queue);
+    radial_gradient_pass!(self).load_triangles_vertices(buffers, &self.device, &self.queue);
   }
 
   fn load_linear_gradient_primitives(&mut self, primitives: &[LinearGradientPrimitive]) {
-    self
-      .linear_gradient_pass
-      .load_linear_gradient_primitives(&self.queue, primitives);
+    linear_gradient_pass!(self).load_linear_gradient_primitives(&self.queue, primitives);
   }
 
   fn load_linear_gradient_stops(&mut self, stops: &[GradientStopPrimitive]) {
-    self
-      .linear_gradient_pass
-      .load_gradient_stops(&self.queue, stops);
+    linear_gradient_pass!(self).load_gradient_stops(&self.queue, stops);
   }
 
   fn load_linear_gradient_vertices(&mut self, buffers: &VertexBuffers<LinearGradientPrimIndex>) {
-    self
-      .linear_gradient_pass
-      .load_triangles_vertices(buffers, &self.device, &self.queue);
+    linear_gradient_pass!(self).load_triangles_vertices(buffers, &self.device, &self.queue);
   }
 
   fn load_mask_layers(&mut self, layers: &[crate::MaskLayer]) {
@@ -202,7 +240,7 @@ impl GPUBackendImpl for WgpuImpl {
   ) {
     let encoder = command_encoder!(self);
 
-    self.radial_gradient_pass.draw_triangles(
+    radial_gradient_pass!(self).draw_triangles(
       texture,
       indices,
       clear,
@@ -220,7 +258,7 @@ impl GPUBackendImpl for WgpuImpl {
   ) {
     let encoder = command_encoder!(self);
 
-    self.linear_gradient_pass.draw_triangles(
+    linear_gradient_pass!(self).draw_triangles(
       texture,
       indices,
       clear,
@@ -250,7 +288,7 @@ impl GPUBackendImpl for WgpuImpl {
     &mut self, texture: &mut Self::Texture, indices: Range<u32>, clear: Option<Color>,
   ) {
     let encoder = command_encoder!(self);
-    self.color_triangles_pass.draw_triangles(
+    color_pass!(self).draw_triangles(
       texture,
       indices,
       clear,
@@ -266,7 +304,7 @@ impl GPUBackendImpl for WgpuImpl {
     &mut self, texture: &mut Self::Texture, indices: Range<u32>, clear: Option<Color>,
   ) {
     let encoder = command_encoder!(self);
-    self.img_triangles_pass.draw_triangles(
+    img_pass!(self).draw_triangles(
       texture,
       indices,
       clear,
@@ -646,14 +684,6 @@ impl WgpuImpl {
 
     let mask_layers_uniform = Uniform::new(&device, wgpu::ShaderStages::FRAGMENT, MAX_MASK_LAYERS);
     let texs_layout = textures_layout(&device);
-    let color_triangles_pass =
-      DrawColorTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
-    let img_triangles_pass =
-      DrawImgTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
-    let radial_gradient_pass =
-      DrawRadialGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
-    let linear_gradient_pass =
-      DrawLinearGradientTrianglesPass::new(&device, mask_layers_uniform.layout(), &texs_layout);
     let gpu_impl = WgpuImpl {
       device,
       queue,
@@ -662,10 +692,10 @@ impl WgpuImpl {
       sampler,
       draw_tex_pass,
       alpha_triangles_pass,
-      color_triangles_pass,
-      img_triangles_pass,
-      radial_gradient_pass,
-      linear_gradient_pass,
+      color_triangles_pass: None,
+      img_triangles_pass: None,
+      radial_gradient_pass: None,
+      linear_gradient_pass: None,
       texs_layout,
       textures_bind: None,
       mask_layers_uniform,