Skip to content

Commit

Permalink
Fix for clearing DEST zero flags when it occurs on the same cycle as …
Browse files Browse the repository at this point in the history
…ZEROACC instruction (#36)

* Fix for clearing DEST zero flags when it occurs on the same cycle as ZEROACC

* Move issueing of ZEROACC to clear zero flags to math thread, and issue it per face

* Address mode is used in clear face mode, so fix it to not have any side effect
  • Loading branch information
nvelickovicTT authored Sep 24, 2024
1 parent d7a12ee commit a68322e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
4 changes: 2 additions & 2 deletions common/inc/cunpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ namespace ckernel::unpacker

uint32_t src_zeroflags_disable = ((uint)unpA_dst_format == (uint)DataFormat::UInt16) || ((uint)unpB_dst_format == (uint)DataFormat::UInt16);
cfg_reg_rmw_tensix<ALU_ACC_CTRL_Zero_Flag_disabled_src_RMW>(src_zeroflags_disable);
//Set FP8 E4M3 mode, bit is accessible by unpacker/packer

//Set FP8 E4M3 mode, bit is accessible by unpacker/packer
if((unpA_src_format&0x1F) == (uint)DataFormat::Fp8_e4m3) {
cfg_reg_rmw_tensix<THCON_SEC0_REG1_Unp_LF8_4b_exp_RMW>(1);
}
Expand Down
29 changes: 28 additions & 1 deletion llk_lib/llk_math_eltwise_unary_datacopy.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,32 @@ inline void eltwise_unary_configure_addrmod();
template <DataCopyType type, BroadcastType src_b_bcast_type = BroadcastType::NONE, DstSync Dst = DstSync::SyncFull, bool is_fp32_dest_acc_en = false, bool unpack_to_dest = false>
inline void _llk_math_eltwise_unary_datacopy_(const std::uint32_t dst_index, const std::uint32_t src_format, const std::uint32_t dst_format) {

std::uint32_t constexpr num_faces = 4;

// For 32bit data, each half of DEST can take 16 tiles. Since dest offset is returned as if 16bit data are used, we need to
// adjust it to offset in faces for 32bit data.
std::uint32_t dest_base_offset_in_faces = get_dest_buffer_base() >> 5;
std::uint32_t dst_index_in_faces = dst_index << 2; // Each tile has 4 faces;

if (unpack_to_dest && is_32bit_input(src_format, dst_format)) {
#if SKIP_UNP == 1
#if SKIP_UNP == 1
#else
math_unpack_to_dest_math_ready();
math::set_dst_write_addr<DstTileLayout::Default, DstTileShape::Tile32x32, true>(dst_index);
math::math_unpack_to_dest_tile_ready();

// Due to bug in Blackhole Tensix (more details in budabackend/#2730) when an event with side effect of clearing DEST zero flags
// (such as Unpack-to-dest or RISC-to-dest) and a ZEROACC instruction from packer occur in the same cycle,
// zero flags clearing is dropped.
// To mitigate that, we issue additional zero flag clear instruction immediatelly after unpack tile to dest is done.
// RISC-to-dest event is not currently used.

#pragma GCC unroll 0
for (std::uint32_t i = 0; i < num_faces; i++)
{
// Clears zero flags in DEST for one face.
TT_ZEROACC(p_zeroacc::CLR_16, 0, 1 /*clear zero flags*/, ADDR_MOD_3, dest_base_offset_in_faces + dst_index_in_faces + i);
}
#endif
} else {

Expand Down Expand Up @@ -61,6 +81,13 @@ inline void _llk_math_eltwise_unary_datacopy_(const std::uint32_t dst_index, con

template <DataCopyType type, BroadcastType bcast_type = BroadcastType::NONE>
inline void eltwise_unary_configure_addrmod() {
addr_mod_t{
.srca = {.incr = 0},
.srcb = {.incr = 0},
.dest = {.incr = 0},
}
.set(ADDR_MOD_3);

// Use srcA for data movement
if constexpr (type == A2D) {
addr_mod_t{
Expand Down

0 comments on commit a68322e

Please sign in to comment.