Skip to content

Commit

Permalink
Enable uint8 A2D and (un)pack reconfig (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
rdjogoTT authored Aug 19, 2024
1 parent cb61929 commit 955954c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
19 changes: 16 additions & 3 deletions common/inc/cpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,24 @@ namespace ckernel::packer
TTI_STALLWAIT(p_stall::STALL_CFG, p_stall::THCON);
TTI_WRCFG(p_gpr_pack::TMP_LO, p_cfg::WRCFG_32b, THCON_SEC0_REG1_Row_start_section_size_ADDR32 + 2);

uint32_t reconfig_PCK_DEST_RD_CTRL_Read_unsigned = 0;
dest_rd_ctrl_u dest_rd_ctrl;
dest_rd_ctrl.val = 0;
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Read_32b_data = (pack_src_format == (uint)DataFormat::Int8) |
(pack_src_format == (uint)DataFormat::UInt8) |
(pack_src_format == (uint)DataFormat::Int32) |
(pack_src_format == (uint)DataFormat::Float32) |
(is_fp32_dest_acc_en ? 1 : 0);
if (pack_dst_format == (uint)DataFormat::UInt8) {
reconfig_PCK_DEST_RD_CTRL_Read_unsigned = 1;
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Read_unsigned = 1;
}
//Round to 10 bit mantissa from fp32 dest
if(is_fp32_dest_acc_en && (pack_src_format!=(uint)DataFormat::Float32)) {
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Round_10b_mant = 1;
}
cfg_reg_rmw_tensix<PCK_DEST_RD_CTRL_Read_unsigned_RMW>(reconfig_PCK_DEST_RD_CTRL_Read_unsigned);
cfg_reg_rmw_tensix<PCK_DEST_RD_CTRL_Read_32b_data_ADDR32,
PCK_DEST_RD_CTRL_Read_32b_data_SHAMT,
PCK_DEST_RD_CTRL_Read_32b_data_MASK | PCK_DEST_RD_CTRL_Read_unsigned_MASK | PCK_DEST_RD_CTRL_Round_10b_mant_MASK>
(dest_rd_ctrl.val);

if (IS_BFP_FORMAT(pack_output_dst_format)) {
TTI_WRCFG(p_gpr_pack::EXP0_SEC_SIZE_BFP, p_cfg::WRCFG_32b, THCON_SEC0_REG1_Row_start_section_size_ADDR32);
Expand Down
4 changes: 2 additions & 2 deletions llk_lib/llk_math_eltwise_unary_datacopy.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ inline void eltwise_unary_configure_mop(uint rows_per_inst, uint total_rows, con
uint innerloop = (rows_per_inst == p_mova2d::MOV_1_ROW) ? total_rows : (total_rows >> 3);
uint outerloop = tilize ? 1 : num_faces;

if ((is_fp32_dest_acc_en || is_int_fpu_en) && !(dst_format == (uint)DataFormat::UInt16)) {
//use elwadd to handle unpacking data into src A as fp16, but dest is in fp32 mode
if (((is_fp32_dest_acc_en || is_int_fpu_en) && !(dst_format == (uint)DataFormat::UInt16)) || (dst_format == (uint)DataFormat::UInt8)) {
// use elwadd to handle unpacking data into src A as fp16, but dest is in fp32 mode OR to handle uint8 datums
ckernel_template tmp(outerloop, innerloop, TT_OP_ELWADD(0, 0, p_elwise::SRCB_NO_BCAST, ADDR_MOD_2, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
Expand Down
11 changes: 11 additions & 0 deletions llk_lib/llk_unpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ inline void _llk_unpack_config_tile_dim_srcb_impl_(const std::uint32_t face_r_di

inline void _llk_unpack_reconfig_data_format_srca_impl_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t tile_size)
{
alu_config_u alu_payload = {.val = 0};
alu_payload.f.ALU_FORMAT_SPEC_REG0_SrcA = unpack_dst_format;
if ((uint)unpack_src_format == (uint)DataFormat::UInt8) {
alu_payload.f.ALU_FORMAT_SPEC_REG0_SrcAUnsigned = 1;
}
alu_payload.f.ALU_ACC_CTRL_INT8_math_enabled = ((uint)unpack_dst_format == (uint)DataFormat::Int8) ||
((uint)unpack_dst_format == (uint)DataFormat::UInt8) ||
((uint)unpack_dst_format == (uint)DataFormat::Int32);
constexpr uint alu_mask = ALU_FORMAT_SPEC_REG0_SrcA_MASK | ALU_FORMAT_SPEC_REG0_SrcAUnsigned_MASK | ALU_ACC_CTRL_INT8_math_enabled_MASK;
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, alu_mask>(alu_payload.val);

cfg_reg_rmw_tensix<THCON_SEC0_REG0_TileDescriptor_ADDR32, 0, 0x0f>(unpack_src_format);
cfg_reg_rmw_tensix<THCON_SEC0_REG2_Out_data_format_RMW>(unpack_dst_format);
TT_SETDMAREG(0, LOWER_HALFWORD(tile_size), 0, LO_16(p_gpr_unpack::TILE_SIZE_A)); // update gpr which holds tile size A
Expand Down

0 comments on commit 955954c

Please sign in to comment.