diff --git a/src/ctx.rs b/src/ctx.rs index be22f7e27..8eb04fdd7 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -52,38 +52,55 @@ use std::iter::zip; /// This optimizes for the common cases where `buf.len()` is a small power of 2, /// where the array write is optimized as few and large stores as possible. #[inline] -pub fn small_memset( +pub fn small_memset( buf: &mut [T], val: T, ) { fn as_array(buf: &mut [T]) -> &mut [T; N] { buf.try_into().unwrap() } - match buf.len() { - 01 if UP_TO >= 01 => *as_array(buf) = [val; 01], - 02 if UP_TO >= 02 => *as_array(buf) = [val; 02], - 04 if UP_TO >= 04 => *as_array(buf) = [val; 04], - 08 if UP_TO >= 08 => *as_array(buf) = [val; 08], - 16 if UP_TO >= 16 => *as_array(buf) = [val; 16], - 32 if UP_TO >= 32 => *as_array(buf) = [val; 32], - 64 if UP_TO >= 64 => *as_array(buf) = [val; 64], - _ => { - if WITH_DEFAULT { - buf.fill(val) - } + if N == 0 { + if WITH_DEFAULT { + buf.fill(val) } + } else { + assert!(buf.len() == N); // Meant to be optimized out. + *as_array(buf) = [val; N]; } } -pub struct CaseSetter { +pub trait CaseSetter { + fn set(&self, buf: &mut [T], val: T); + + /// # Safety + /// + /// Caller must ensure that no elements of the written range are concurrently + /// borrowed (immutably or mutably) at all during the call to `set_disjoint`. + fn set_disjoint(&self, buf: &DisjointMut, val: V) + where + T: AsMutPtr, + V: Clone + Copy; +} + +pub struct CaseSetterN { offset: usize, len: usize, } -impl CaseSetter { +impl CaseSetterN { + const fn len(&self) -> usize { + if N == 0 { + self.len + } else { + N + } + } +} + +impl CaseSetter for CaseSetterN { #[inline] - pub fn set(&self, buf: &mut [T], val: T) { - small_memset::(&mut buf[self.offset..][..self.len], val); + fn set(&self, buf: &mut [T], val: T) { + small_memset::<_, N, WITH_DEFAULT>(&mut buf[self.offset..][..self.len()], val); } /// # Safety @@ -91,16 +108,65 @@ impl CaseSetter(&self, buf: &DisjointMut, val: V) + fn set_disjoint(&self, buf: &DisjointMut, val: V) where T: AsMutPtr, V: Clone + Copy, { - let mut buf = buf.index_mut(self.offset..self.offset + self.len); - small_memset::(&mut *buf, val); + let mut buf = buf.index_mut((self.offset.., ..self.len())); + small_memset::<_, N, WITH_DEFAULT>(&mut *buf, val); } } +/// Rank-2 polymorphic closures aren't a thing in Rust yet, +/// so we need to emulate this through a generic trait with a generic method. +/// Unforunately, this means we have to write the closure sugar manually. +pub trait SetCtx { + fn call(self, case: &S, ctx: T) -> Self; +} + +/// Emulate a closure for a [`SetCtx`] `impl`. +macro_rules! set_ctx { + ( + // `||` is used instead of just `|` due to this bug: . + || + $($lifetime:lifetime,)? + $case:ident, + $ctx:ident: $T:ty, + // Note that the required trailing `,` is so `:expr` can precede `|`. + $($up_var:ident: $up_var_ty:ty$( = $up_var_val:expr)?,)* + || $body:block + ) => {{ + use $crate::src::ctx::SetCtx; + use $crate::src::ctx::CaseSetter; + + struct F$(<$lifetime>)? { + $($up_var: $up_var_ty,)* + } + + impl$(<$lifetime>)? SetCtx<$T> for F$(<$lifetime>)? { + fn call(self, $case: &S, $ctx: $T) -> Self { + let Self { + $($up_var,)* + } = self; + $body + // We destructure and re-structure `Self` so that we + // can move out of refs without using `ref`/`ref mut`, + // which I don't know how to match on in a macro. + Self { + $($up_var,)* + } + } + } + + F { + $($up_var$(: $up_var_val)?,)* + } + }}; +} + +pub(crate) use set_ctx; + /// The entrypoint to the [`CaseSet`] API. /// /// `UP_TO` and `WITH_DEFAULT` are made const generic parameters rather than have multiple `case_set*` `fn`s, @@ -117,11 +183,25 @@ impl CaseSet /// The `len` and `offset` are supplied here and /// applied to each `buf` passed to [`CaseSetter::set`] in `set_ctx`. #[inline] - pub fn one(ctx: T, len: usize, offset: usize, mut set_ctx: F) + pub fn one(ctx: T, len: usize, offset: usize, set_ctx: F) -> F where - F: FnMut(&CaseSetter, T), + F: SetCtx, { - set_ctx(&CaseSetter { offset, len }, ctx); + macro_rules! set_ctx { + ($N:literal) => { + set_ctx.call(&CaseSetterN::<$N, WITH_DEFAULT> { offset, len }, ctx) + }; + } + match len { + 01 if UP_TO >= 01 => set_ctx!(01), + 02 if UP_TO >= 02 => set_ctx!(02), + 04 if UP_TO >= 04 => set_ctx!(04), + 08 if UP_TO >= 08 => set_ctx!(08), + 16 if UP_TO >= 16 => set_ctx!(16), + 32 if UP_TO >= 32 => set_ctx!(32), + 64 if UP_TO >= 64 => set_ctx!(64), + _ => set_ctx!(0), + } } /// Perform many case sets in one call. @@ -138,10 +218,10 @@ impl CaseSet offsets: [usize; N], mut set_ctx: F, ) where - F: FnMut(&CaseSetter, T), + F: SetCtx, { for (dir, (len, offset)) in zip(dirs, zip(lens, offsets)) { - Self::one(dir, len, offset, &mut set_ctx); + set_ctx = Self::one(dir, len, offset, set_ctx); } } } diff --git a/src/decode.rs b/src/decode.rs index 3f0ae3920..d6bc0c034 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -26,6 +26,7 @@ use crate::src::cdf::rav1d_cdf_thread_init_static; use crate::src::cdf::rav1d_cdf_thread_update; use crate::src::cdf::CdfMvComponent; use crate::src::cdf::CdfThreadContext; +use crate::src::ctx::set_ctx; use crate::src::ctx::CaseSet; use crate::src::dequant_tables::dav1d_dq_tbl; use crate::src::disjoint_mut::DisjointMut; @@ -371,7 +372,8 @@ fn read_tx_tree( [(&t.l, txh), (&f.a[t.a], txw)], [t_dim.h as usize, t_dim.w as usize], [by4 as usize, bx4 as usize], - |case, (dir, val)| { + set_ctx!(||case, dir: (&BlockContext, u8), is_split: bool,|| { + let (dir, val) = dir; let tx = if is_split { TxfmSize::S4x4 } else { @@ -379,7 +381,7 @@ fn read_tx_tree( TxfmSize::from_repr(val as _).unwrap() }; case.set_disjoint(&dir.tx, tx); - }, + }), ); }; } @@ -811,9 +813,9 @@ fn read_vartx_tree( [&t.l, &f.a[t.a]], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.tx, TxfmSize::S4x4); - }, + }), ); } } else if txfm_mode != Rav1dTxfmMode::Switchable || b.skip != 0 { @@ -822,11 +824,12 @@ fn read_vartx_tree( [(&t.l, 1), (&f.a[t.a], 0)], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { + set_ctx!(||'a, case, dir: (&BlockContext, usize), b_dim: &'a [u8; 4],|| { + let (dir, dir_index) = dir; // TODO check unwrap is optimized out let tx = TxfmSize::from_repr(b_dim[2 + dir_index] as _).unwrap(); case.set_disjoint(&dir.tx, tx); - }, + }), ); } uvtx = dav1d_max_txfm_size_for_bs[bs as usize][f.cur.p.layout as usize]; @@ -1206,10 +1209,10 @@ fn decode_b( [&t.l, &f.a[t.a]], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext, y_mode_nofilt: u8,|| { case.set_disjoint(&dir.mode, y_mode_nofilt); case.set_disjoint(&dir.intra, 1); - }, + }), ); if frame_type.is_inter_or_switch() { let ri = t.rt.r[(t.b.y as usize & 31) + 5 + bh4 as usize - 1] + t.b.x as usize; @@ -1231,9 +1234,9 @@ fn decode_b( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||'a, case, dir: &BlockContext, intra: &'a Av1BlockIntra,|| { case.set_disjoint(&dir.uvmode, intra.uv_mode); - }, + }), ); } } @@ -1281,11 +1284,11 @@ fn decode_b( [&t.l, &f.a[t.a]], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, dir| { + set_ctx!(||'a, case, dir: &BlockContext, filter: &'a [Rav1dFilterMode; 2],|| { case.set_disjoint(&dir.filter[0], filter[0].into()); case.set_disjoint(&dir.filter[1], filter[1].into()); case.set_disjoint(&dir.intra, 0); - }, + }), ); if frame_type.is_inter_or_switch() { @@ -1310,9 +1313,9 @@ fn decode_b( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.uvmode, DC_PRED); - }, + }), ); } } @@ -1973,7 +1976,17 @@ fn decode_b( [(&t.l, t_dim.lh, 1), (&f.a[t.a], t_dim.lw, 0)], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, (dir, lw_lh, dir_index)| { + set_ctx!(||'a, case, dir: (&BlockContext, u8, usize), + y_mode_nofilt: u8, + pal_sz: [u8; 2], + seg_pred: bool, + b: &'a Av1Block, + // Only real closures can do partial borrows. + pal_sz_uv: &'a mut [[u8; 32]; 2] = &mut t.pal_sz_uv, + has_chroma: bool, + is_inter_or_switch: bool, + || { + let (dir, lw_lh, dir_index) = dir; case.set_disjoint(&dir.tx_intra, lw_lh as i8); // TODO check unwrap is optimized out case.set_disjoint(&dir.tx, TxfmSize::from_repr(lw_lh as _).unwrap()); @@ -1985,7 +1998,7 @@ fn decode_b( case.set_disjoint(&dir.skip, b.skip); // see aomedia bug 2183 for why we use luma coordinates here case.set( - &mut t.pal_sz_uv[dir_index], + &mut pal_sz_uv[dir_index], if has_chroma { pal_sz[1] } else { 0 }, ); if is_inter_or_switch { @@ -1995,7 +2008,7 @@ fn decode_b( case.set_disjoint(&dir.filter[0], Rav1dFilterMode::N_SWITCHABLE_FILTERS); case.set_disjoint(&dir.filter[1], Rav1dFilterMode::N_SWITCHABLE_FILTERS); } - }, + }), ); if pal_sz[0] != 0 { (bd_fn.copy_pal_block_y)(t, f, bx4 as usize, by4 as usize, bw4 as usize, bh4 as usize); @@ -2005,9 +2018,9 @@ fn decode_b( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext, uv_mode: u8,|| { case.set_disjoint(&dir.uvmode, uv_mode); - }, + }), ); if pal_sz[1] != 0 { (bd_fn.copy_pal_block_uv)( @@ -2182,26 +2195,33 @@ fn decode_b( [(&t.l, 1), (&f.a[t.a], 0)], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { + set_ctx!(||'a, case, dir: (&BlockContext, usize), + b_dim: &'a [u8; 4], + seg_pred: bool, + // Only real closures can do partial borrows. + pal_sz_uv: &'a mut [[u8; 32]; 2] = &mut t.pal_sz_uv, + b: &'a Av1Block, + || { + let (dir, dir_index) = dir; case.set_disjoint(&dir.tx_intra, b_dim[2 + dir_index] as i8); case.set_disjoint(&dir.mode, DC_PRED); case.set_disjoint(&dir.pal_sz, 0); // see aomedia bug 2183 for why this is outside `if has_chroma {}` - case.set(&mut t.pal_sz_uv[dir_index], 0); + case.set(&mut pal_sz_uv[dir_index], 0); case.set_disjoint(&dir.seg_pred, seg_pred.into()); case.set_disjoint(&dir.skip_mode, 0); case.set_disjoint(&dir.intra, 0); case.set_disjoint(&dir.skip, b.skip); - }, + }), ); if has_chroma { CaseSet::<32, false>::many( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.uvmode, DC_PRED); - }, + }), ); } } else { @@ -3135,14 +3155,25 @@ fn decode_b( [(&t.l, 1), (&f.a[t.a], 0)], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, (dir, dir_index)| { + set_ctx!(||'a, case, dir: (&BlockContext, usize), + seg_pred: bool, + b: &'a Av1Block, + // Only real closures can do partial borrows. + pal_sz_uv: &'a mut [[u8; 32]; 2] = &mut t.pal_sz_uv, + b_dim: &'a [u8; 4], + comp_type: Option, + filter: [Rav1dFilterMode; 2], + inter_mode: u8, + r#ref: [i8; 2], + || { + let (dir, dir_index) = dir; case.set_disjoint(&dir.seg_pred, seg_pred.into()); case.set_disjoint(&dir.skip_mode, b.skip_mode); case.set_disjoint(&dir.intra, 0); case.set_disjoint(&dir.skip, b.skip); case.set_disjoint(&dir.pal_sz, 0); // see aomedia bug 2183 for why this is outside if (has_chroma) - case.set(&mut t.pal_sz_uv[dir_index], 0); + case.set(&mut pal_sz_uv[dir_index], 0); case.set_disjoint(&dir.tx_intra, b_dim[2 + dir_index] as i8); case.set_disjoint(&dir.comp_type, comp_type); case.set_disjoint(&dir.filter[0], filter[0]); @@ -3150,7 +3181,7 @@ fn decode_b( case.set_disjoint(&dir.mode, inter_mode); case.set_disjoint(&dir.r#ref[0], r#ref[0]); case.set_disjoint(&dir.r#ref[1], r#ref[1]); - }, + }), ); if has_chroma { @@ -3158,9 +3189,9 @@ fn decode_b( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.uvmode, DC_PRED); - }, + }), ); } } @@ -3173,12 +3204,24 @@ fn decode_b( let b4_stride = usize::try_from(f.b4_stride).unwrap(); let cur_segmap = &f.cur_segmap.as_ref().unwrap().inner; let offset = by * b4_stride + bx; - CaseSet::<32, false>::one((), bw4, 0, |case, ()| { - for i in 0..bh4 { - let i = offset + i * b4_stride; - case.set(&mut cur_segmap.index_mut((i.., ..bw4)), b.seg_id); - } - }); + CaseSet::<32, false>::one( + (), + bw4, + 0, + set_ctx!(||'a, case, _dir: (), + bw4: usize, + bh4: usize, + offset: usize, + b4_stride: usize, + cur_segmap: &'a DisjointMutSlice, + b: &'a Av1Block, + || { + for i in 0..bh4 { + let i = offset + i * b4_stride; + case.set(&mut cur_segmap.index_mut((i.., ..bw4)), b.seg_id); + } + }), + ); } if b.skip == 0 { let mask = !0u32 >> 32 - bw4 << (bx4 & 15); @@ -3795,12 +3838,16 @@ fn decode_sb( [(&f.a[t.a], 0), (&t.l, 1)], [hsz as usize; 2], [bx8 as usize, by8 as usize], - |case, (dir, dir_index)| { + set_ctx!(||case, dir: (&BlockContext, usize), + bl: BlockLevel, + bp: BlockPartition, + || { + let (dir, dir_index) = dir; case.set_disjoint( &dir.partition, dav1d_al_part_ctx[dir_index][bl as usize][bp as usize], ); - }, + }), ); } diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 3310f34fc..fd36eee95 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -6,6 +6,7 @@ use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::include::dav1d::headers::Rav1dRestorationType; use crate::src::align::Align16; use crate::src::align::ArrayDefault; +use crate::src::ctx::set_ctx; use crate::src::ctx::CaseSet; use crate::src::disjoint_mut::DisjointMut; use crate::src::internal::Bxy; @@ -14,6 +15,7 @@ use crate::src::levels::SegmentId; use crate::src::levels::TxfmSize; use crate::src::relaxed_atomic::RelaxedAtomic; use crate::src::tables::dav1d_txfm_dimensions; +use crate::src::tables::TxfmInfo; use libc::ptrdiff_t; use parking_lot::RwLock; use std::cmp; @@ -128,16 +130,37 @@ fn decomp_tx( let lw = cmp::min(2, t_dim.lw); let lh = cmp::min(2, t_dim.lh); - CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { - for y in 0..t_dim.h as usize { - case.set(&mut txa[0][0][y0 + y], lw); - case.set(&mut txa[1][0][y0 + y], lh); - txa[0][1][y0 + y][x0] = t_dim.w; - } - }); - CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { - case.set(&mut txa[1][1][y0], t_dim.h); - }); + CaseSet::<16, false>::one( + (), + t_dim.w as usize, + x0, + set_ctx!(||'a, case, _dir: (), + t_dim: &'a TxfmInfo, + txa: &'a mut [[[[u8; 32]; 32]; 2]; 2], + y0: usize, + x0: usize, + lw: u8, + lh: u8, + || { + for y in 0..t_dim.h as usize { + case.set(&mut txa[0][0][y0 + y], lw); + case.set(&mut txa[1][0][y0 + y], lh); + txa[0][1][y0 + y][x0] = t_dim.w; + } + }), + ); + CaseSet::<16, false>::one( + (), + t_dim.w as usize, + x0, + set_ctx!(||'a, case, _dir: (), + t_dim: &'a TxfmInfo, + txa: &'a mut [[[[u8; 32]; 32]; 2]; 2], + y0: usize, + || { + case.set(&mut txa[1][1][y0], t_dim.h); + }), + ); }; } @@ -297,9 +320,10 @@ fn mask_edges_intra( [(a, thl4c), (l, twl4c)], [w4 as usize, h4 as usize], [0, 0], - |case, (dir, tl4c)| { + set_ctx!(||case, dir: (&mut [u8], u8),|| { + let (dir, tl4c) = dir; case.set(dir, tl4c); - }, + }), ); } @@ -380,9 +404,10 @@ fn mask_edges_chroma( [(a, thl4c), (l, twl4c)], [cw4 as usize, ch4 as usize], [0, 0], - |case, (dir, tl4c)| { + set_ctx!(||case, dir: (&mut [u8], u8),|| { + let (dir, tl4c) = dir; case.set(dir, tl4c); - }, + }), ); } diff --git a/src/recon.rs b/src/recon.rs index 274a50841..8e38aaed1 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -20,8 +20,10 @@ use crate::include::dav1d::picture::Rav1dPictureDataComponent; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::assume::assume; use crate::src::cdef_apply::rav1d_cdef_brow; +use crate::src::ctx::set_ctx; use crate::src::ctx::CaseSet; use crate::src::env::get_uv_inter_txtp; +use crate::src::env::BlockContext; use crate::src::in_range::InRange; use crate::src::internal::Bxy; use crate::src::internal::Cf; @@ -1396,23 +1398,32 @@ fn read_coef_tree( ); } CaseSet::<16, true>::many( - [&t.l.lcoef, &f.a[t.a].lcoef], + [&t.l, &f.a[t.a]], [ cmp::min(txh as c_int, f.bh - t.b.y) as usize, cmp::min(txw as c_int, f.bw - t.b.x) as usize, ], [by4, bx4], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + set_ctx!(||case, dir: &BlockContext, cf_ctx: u8,|| { + case.set_disjoint(&dir.lcoef, cf_ctx); + }), ); let txtp_map = &mut t.scratch.inter_intra_mut().ac_txtp_map.txtp_map_mut()[by4 * 32 + bx4..]; - CaseSet::<16, false>::one((), txw as usize, 0, |case, ()| { - for txtp_map in txtp_map.chunks_mut(32).take(txh as usize) { - case.set(txtp_map, txtp); - } - }); + CaseSet::<16, false>::one( + (), + txw as usize, + 0, + set_ctx!(||'a, case, _dir: (), + txtp_map: &'a mut [u8], + txh: u8, + txtp: u8, + || { + for txtp_map in txtp_map.chunks_mut(32).take(txh as usize) { + case.set(txtp_map, txtp); + } + }), + ); if t.frame_thread.pass == 1 { let cbi_idx = ts.frame_thread[1].cbi_idx.get_update(|i| i + 1); f.frame_thread.cbi[cbi_idx as usize].set(CodedBlockInfo::new(eob as i16, txtp)); @@ -1479,20 +1490,20 @@ pub(crate) fn rav1d_read_coef_blocks( [&t.l, &f.a[t.a]], [bh4 as usize, bw4 as usize], [by4, bx4], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.lcoef, 0x40); - }, + }), ); if has_chroma { CaseSet::<32, false>::many( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4, cbx4], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { for ccoef in &dir.ccoef { case.set_disjoint(ccoef, 0x40) } - }, + }), ); } return; @@ -1582,15 +1593,15 @@ pub(crate) fn rav1d_read_coef_blocks( f.frame_thread.cbi[cbi_idx as usize] .set(CodedBlockInfo::new(eob as i16, txtp)); CaseSet::<16, true>::many( - [&t.l.lcoef, &f.a[t.a].lcoef], + [&t.l, &f.a[t.a]], [ cmp::min(t_dim.h as i32, f.bh - t.b.y) as usize, cmp::min(t_dim.w as i32, f.bw - t.b.x) as usize, ], [by4 + y as usize, bx4 + x as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + set_ctx!(||case, dir: &BlockContext, cf_ctx: u8,|| { + case.set_disjoint(&dir.lcoef, cf_ctx); + }), ); } } @@ -1631,10 +1642,8 @@ pub(crate) fn rav1d_read_coef_blocks( }; let a_start = cbx4 + x as usize; let a_len = uv_t_dim.w as usize; - let a_ccoef = &f.a[t.a].ccoef[pl]; let l_start = cby4 + y as usize; let l_len = uv_t_dim.h as usize; - let l_ccoef = &t.l.ccoef[pl]; let cf_idx = ts.frame_thread[1].cf.get(); let eob = decode_coefs::( f, @@ -1643,8 +1652,8 @@ pub(crate) fn rav1d_read_coef_blocks( debug_block_info!(f, t.b), &mut t.scratch, &mut t.cf, - &mut a_ccoef.index_mut((a_start.., ..a_len)), - &mut l_ccoef.index_mut((l_start.., ..l_len)), + &mut f.a[t.a].ccoef[pl].index_mut((a_start.., ..a_len)), + &mut t.l.ccoef[pl].index_mut((l_start.., ..l_len)), b.uvtx, bs, b, @@ -1666,7 +1675,7 @@ pub(crate) fn rav1d_read_coef_blocks( .cf .set(cf_idx + uv_t_dim.w as u32 * uv_t_dim.h as u32 * 16); CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], + [&t.l, &f.a[t.a]], [ cmp::min( uv_t_dim.h as i32, @@ -1678,9 +1687,9 @@ pub(crate) fn rav1d_read_coef_blocks( ) as usize, ], [cby4 + y as usize, cbx4 as usize + x as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + set_ctx!(||case, dir: &BlockContext, pl: usize, cf_ctx: u8,|| { + case.set_disjoint(&dir.ccoef[pl], cf_ctx); + }), ); x += uv_t_dim.w; t.b.x += (uv_t_dim.w as c_int) << ss_hor; @@ -2330,9 +2339,9 @@ pub(crate) fn rav1d_recon_b_intra( cmp::min(t_dim.w as i32, f.bw - t.b.x) as usize, ], [(by4 + y) as usize, (bx4 + x) as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext, cf_ctx: u8,|| { case.set_disjoint(&dir.lcoef, cf_ctx); - }, + }), ); } if eob >= 0 { @@ -2361,9 +2370,9 @@ pub(crate) fn rav1d_recon_b_intra( [&t.l, &f.a[t.a]], [t_dim.h as usize, t_dim.w as usize], [(by4 + y) as usize, (bx4 + x) as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.lcoef, 0x40); - }, + }), ); } y_dst += 4 * t_dim.w as usize; @@ -2665,9 +2674,7 @@ pub(crate) fn rav1d_recon_b_intra( } else { let mut cf_ctx: u8 = 0; let a_start = (cbx4 + x) as usize; - let a_ccoef = &f.a[t.a].ccoef[pl]; let l_start = (cby4 + y) as usize; - let l_ccoef = &t.l.ccoef[pl]; eob = decode_coefs::( f, t.ts, @@ -2675,8 +2682,10 @@ pub(crate) fn rav1d_recon_b_intra( debug_block_info!(f, t.b), &mut t.scratch, &mut t.cf, - &mut a_ccoef.index_mut(a_start..a_start + uv_t_dim.w as usize), - &mut l_ccoef.index_mut(l_start..l_start + uv_t_dim.h as usize), + &mut f.a[t.a].ccoef[pl] + .index_mut(a_start..a_start + uv_t_dim.w as usize), + &mut t.l.ccoef[pl] + .index_mut(l_start..l_start + uv_t_dim.h as usize), b.uvtx, bs, b, @@ -2699,7 +2708,7 @@ pub(crate) fn rav1d_recon_b_intra( ); } CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], + [&t.l, &f.a[t.a]], [ cmp::min(uv_t_dim.h as i32, f.bh - t.b.y + ss_ver >> ss_ver) as usize, @@ -2707,9 +2716,9 @@ pub(crate) fn rav1d_recon_b_intra( as usize, ], [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + set_ctx!(||case, dir: &BlockContext, pl: usize, cf_ctx: u8,|| { + case.set_disjoint(&dir.ccoef[pl], cf_ctx); + }), ); } if eob >= 0 { @@ -2738,9 +2747,9 @@ pub(crate) fn rav1d_recon_b_intra( [&t.l, &f.a[t.a]], [uv_t_dim.h as usize, uv_t_dim.w as usize], [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext, pl: usize,|| { case.set_disjoint(&dir.ccoef[pl], 0x40); - }, + }), ); } uv_dst += uv_t_dim.w as usize * 4; @@ -3449,20 +3458,20 @@ pub(crate) fn rav1d_recon_b_inter( [&t.l, &f.a[t.a]], [bh4 as usize, bw4 as usize], [by4 as usize, bx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { case.set_disjoint(&dir.lcoef, 0x40); - }, + }), ); if has_chroma { CaseSet::<32, false>::many( [&t.l, &f.a[t.a]], [cbh4 as usize, cbw4 as usize], [cby4 as usize, cbx4 as usize], - |case, dir| { + set_ctx!(||case, dir: &BlockContext,|| { for ccoef in &dir.ccoef { case.set_disjoint(ccoef, 0x40); } - }, + }), ); } return Ok(()); @@ -3547,9 +3556,7 @@ pub(crate) fn rav1d_recon_b_inter( let mut cf_ctx = 0; txtp = t.scratch.inter_intra().ac_txtp_map.txtp_map() [((by4 + (y << ss_ver)) * 32 + bx4 + (x << ss_hor)) as usize]; - let a_ccoef = &f.a[t.a].ccoef[pl]; let a_start = (cbx4 + x) as usize; - let l_ccoef = &t.l.ccoef[pl]; let l_start = (cby4 + y) as usize; eob = decode_coefs::( f, @@ -3558,8 +3565,9 @@ pub(crate) fn rav1d_recon_b_inter( debug_block_info!(f, t.b), &mut t.scratch, &mut t.cf, - &mut a_ccoef.index_mut((a_start.., ..uvtx.w as usize)), - &mut l_ccoef.index_mut((l_start.., ..uvtx.h as usize)), + &mut f.a[t.a].ccoef[pl] + .index_mut((a_start.., ..uvtx.w as usize)), + &mut t.l.ccoef[pl].index_mut((l_start.., ..uvtx.h as usize)), b.uvtx, bs, b, @@ -3580,7 +3588,7 @@ pub(crate) fn rav1d_recon_b_inter( ); } CaseSet::<16, true>::many( - [l_ccoef, a_ccoef], + [&t.l, &f.a[t.a]], [ cmp::min(uvtx.h as i32, f.bh - t.b.y + ss_ver >> ss_ver) as usize, @@ -3588,9 +3596,9 @@ pub(crate) fn rav1d_recon_b_inter( as usize, ], [(cby4 + y) as usize, (cbx4 + x) as usize], - |case, dir| { - case.set_disjoint(dir, cf_ctx); - }, + set_ctx!(||case, dir: &BlockContext, pl: usize, cf_ctx: u8,|| { + case.set_disjoint(&dir.ccoef[pl], cf_ctx); + }), ); } if eob >= 0 {