Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Well-defined lazy initialization for get_intra_edges #3277

Merged
merged 2 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions benches/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,19 @@ use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use rav1e::bench::cpu_features::CpuFeatureLevel;
use rav1e::bench::frame::*;
use rav1e::bench::partition::BlockSize;
use rav1e::bench::partition::{BlockSize, IntraEdge};
use rav1e::bench::predict::*;
use rav1e::bench::transform::TxSize;
use rav1e::bench::util::*;

pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;

pub fn generate_block<T: Pixel>(
rng: &mut ChaChaRng, edge_buf: &mut Aligned<[T; 257]>,
) -> (Plane<T>, Vec<i16>) {
pub fn generate_block<T: Pixel>(rng: &mut ChaChaRng) -> (Plane<T>, Vec<i16>) {
let block = Plane::from_slice(
&vec![T::cast_from(0); BLOCK_SIZE.width() * BLOCK_SIZE.height()],
BLOCK_SIZE.width(),
);
let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
for v in edge_buf.data.iter_mut() {
*v = T::cast_from(rng.gen::<u8>());
}

(block, ac)
}

Expand Down Expand Up @@ -132,8 +126,9 @@ pub fn intra_bench<T: Pixel>(
b: &mut Bencher, mode: PredictionMode, variant: PredictionVariant,
) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = unsafe { Aligned::uninitialized() };
let (mut block, ac) = generate_block::<T>(&mut rng, &mut edge_buf);
let edge_buf = Aligned::from_fn(|_| T::cast_from(rng.gen::<u8>()));
let edge_buf = IntraEdge::mock(&edge_buf);
let (mut block, ac) = generate_block::<T>(&mut rng);
let cpu = CpuFeatureLevel::default();
let bitdepth = match T::type_enum() {
PixelType::U8 => 8,
Expand Down
3 changes: 3 additions & 0 deletions src/api/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::partition::{get_intra_edges, BlockSize};
use crate::predict::{IntraParam, PredictionMode};
use crate::tiling::{Area, PlaneRegion, TileRect};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::Pixel;
use rayon::iter::*;
use rust_hawktracer::*;
Expand Down Expand Up @@ -54,7 +55,9 @@ pub(crate) fn estimate_intra_costs<T: Pixel>(
});

// TODO: other intra prediction modes.
let mut edge_buf = Aligned::uninit_array();
let edge_buf = get_intra_edges(
&mut edge_buf,
&plane.as_region(),
TileBlockOffset(BlockOffset { x, y }),
0,
Expand Down
15 changes: 6 additions & 9 deletions src/asm/aarch64/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::rust::{
dr_intra_derivative, select_ief_strength, select_ief_upsample,
};
Expand All @@ -18,7 +18,6 @@ use crate::predict::{
};
use crate::tiling::{PlaneRegion, PlaneRegionMut};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::{Pixel, PixelType};
use libc;
use libc::{c_int, ptrdiff_t};
Expand Down Expand Up @@ -487,7 +486,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
mode: PredictionMode, variant: PredictionVariant,
dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
edge_buf: &IntraEdge<T>, cpu: CpuFeatureLevel,
) {
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
rust::dispatch_predict_intra(
Expand All @@ -504,10 +503,8 @@ pub fn dispatch_predict_intra<T: Pixel>(
let dst_ptr = dst.data_ptr_mut() as *mut _;
let dst_u16 = dst.data_ptr_mut() as *mut u16;
let stride = T::to_asm_stride(dst.plane_cfg.stride) as libc::ptrdiff_t;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_u16 =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const u16;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
let edge_u16 = edge_buf.top_left_ptr() as *const u16;
let w = tx_size.width() as libc::c_int;
let h = tx_size.height() as libc::c_int;
let angle = angle as libc::c_int;
Expand Down Expand Up @@ -600,7 +597,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
return ipred_z2(
dst.data_ptr_mut(),
stride,
edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE),
edge_buf.top_left_ptr(),
angle as isize,
w,
h,
Expand All @@ -614,7 +611,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
(if angle < 90 { ipred_z1 } else { ipred_z3 })(
dst.data_ptr_mut(),
stride,
edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE),
edge_buf.top_left_ptr(),
angle as isize,
w,
h,
Expand Down
5 changes: 3 additions & 2 deletions src/asm/shared/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ mod test {
use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::frame::{AsRegion, Plane};
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::dispatch_predict_intra;
use crate::predict::pred_cfl_ac;
use crate::predict::rust;
Expand All @@ -41,9 +41,10 @@ mod test {
fn pred_matches_inner<T: Pixel>(cpu: CpuFeatureLevel, bit_depth: usize) {
let tx_size = TxSize::TX_4X4;
let ac: Aligned<[i16; 32 * 32]> = Aligned::from_fn(|i| i as i16 - 16 * 32);
let edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> = Aligned::from_fn(|i| {
let edge_buf = Aligned::from_fn(|i| {
T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE))
});
let edge_buf = IntraEdge::mock(&edge_buf);

let ief_params_all = [
None,
Expand Down
20 changes: 7 additions & 13 deletions src/asm/shared/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ pub fn call_inverse_func<T: Pixel>(
// Only use at most 32 columns and 32 rows of input coefficients.
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];

// SAFETY: We write to the array below before reading from it.
let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };
let mut copied = Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();

// Convert input to 16-bits.
// TODO: Remove by changing inverse assembly to not overwrite its input
Expand Down Expand Up @@ -57,9 +55,7 @@ pub fn call_inverse_hbd_func<T: Pixel>(
// Only use at most 32 columns and 32 rows of input coefficients.
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];

// SAFETY: We write to the array below before reading from it.
let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };
let mut copied = Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();

// Convert input to 16-bits.
// TODO: Remove by changing inverse assembly to not overwrite its input
Expand Down Expand Up @@ -152,13 +148,11 @@ pub mod test {
&[T::zero(); 64 * 64][..tx_size.area()],
tx_size.width(),
);
let mut res_storage: Aligned<[MaybeUninit<i16>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let res = &mut res_storage.data[..tx_size.area()];
// SAFETY: We write to the array below before reading from it.
let mut freq_storage: Aligned<[MaybeUninit<T::Coeff>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let freq = &mut freq_storage.data[..tx_size.area()];
let mut res = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array();
let res = &mut res.data[..tx_size.area()];
let mut freq =
Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array();
let freq = &mut freq.data[..tx_size.area()];
for ((r, s), d) in
res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
{
Expand Down
12 changes: 4 additions & 8 deletions src/asm/x86/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::{
rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
};
use crate::tiling::{PlaneRegion, PlaneRegionMut};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::Pixel;
use std::mem::MaybeUninit;
use v_frame::pixel::PixelType;
Expand Down Expand Up @@ -242,7 +240,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
mode: PredictionMode, variant: PredictionVariant,
dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
edge_buf: &IntraEdge<T>, cpu: CpuFeatureLevel,
) {
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
rust::dispatch_predict_intra(
Expand All @@ -261,8 +259,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
match T::type_enum() {
PixelType::U8 => {
let dst_ptr = dst.data_ptr_mut() as *mut _;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
if cpu >= CpuFeatureLevel::AVX512ICL {
match mode {
PredictionMode::DC_PRED => {
Expand Down Expand Up @@ -555,8 +552,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
}
PixelType::U16 => {
let dst_ptr = dst.data_ptr_mut() as *mut _;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
let bd_max = (1 << bit_depth) - 1;
if cpu >= CpuFeatureLevel::AVX512ICL {
match mode {
Expand Down
5 changes: 2 additions & 3 deletions src/context/block_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1919,9 +1919,8 @@ impl<'a> ContextWriter<'a> {
tx_size: TxSize, tx_class: TxClass, txs_ctx: usize, plane_type: usize,
w: &mut W,
) {
// SAFETY: We write to the array below before reading from it.
let mut coeff_contexts: Aligned<[MaybeUninit<i8>; MAX_CODED_TX_SQUARE]> =
unsafe { Aligned::uninitialized() };
let mut coeff_contexts =
Aligned::<[MaybeUninit<i8>; MAX_CODED_TX_SQUARE]>::uninit_array();

// get_nz_map_contexts sets coeff_contexts contiguously as a parallel array for scan, not in scan order
let coeff_contexts = self.get_nz_map_contexts(
Expand Down
28 changes: 13 additions & 15 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1475,7 +1475,9 @@ pub fn encode_tx_block<T: Pixel, W: Writer>(

if mode.is_intra() {
let bit_depth = fi.sequence.bit_depth;
let mut edge_buf = Aligned::uninit_array();
let edge_buf = get_intra_edges(
&mut edge_buf,
&rec.as_const(),
tile_partition_bo,
bx,
Expand Down Expand Up @@ -1507,21 +1509,19 @@ pub fn encode_tx_block<T: Pixel, W: Writer>(
}

let coded_tx_area = av1_get_coded_tx_size(tx_size).area();
let mut residual_storage: Aligned<[MaybeUninit<i16>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let mut coeffs_storage: Aligned<[MaybeUninit<T::Coeff>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let mut qcoeffs_storage: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };
let mut rcoeffs_storage: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };
let residual = &mut residual_storage.data[..tx_size.area()];
let coeffs = &mut coeffs_storage.data[..tx_size.area()];
let mut residual = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array();
let mut coeffs = Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array();
let mut qcoeffs =
Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
let mut rcoeffs =
Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
let residual = &mut residual.data[..tx_size.area()];
let coeffs = &mut coeffs.data[..tx_size.area()];
let qcoeffs = init_slice_repeat_mut(
&mut qcoeffs_storage.data[..coded_tx_area],
&mut qcoeffs.data[..coded_tx_area],
T::Coeff::cast_from(0),
);
let rcoeffs = &mut rcoeffs_storage.data[..coded_tx_area];
let rcoeffs = &mut rcoeffs.data[..coded_tx_area];

let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
(fi.width + xdec) >> xdec,
Expand Down Expand Up @@ -2260,9 +2260,7 @@ pub fn write_tx_blocks<T: Pixel, W: Writer>(
}

let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
// SAFETY: We write to the array below before reading from it.
let mut ac: Aligned<[MaybeUninit<i16>; 32 * 32]> =
unsafe { Aligned::uninitialized() };
let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
let mut partition_has_coeff: bool = false;
let mut tx_dist = ScaledDistortion::zero();
let do_chroma =
Expand Down
Loading
Loading