Skip to content

Commit

Permalink
perf: reduce MemBefore initial size (#684)
Browse files Browse the repository at this point in the history
* Reduce KERNEL size

* More

* Reduce address overhead in syscalls

* Remove gas charge todo

* Tweak range

* Apply suggestions

* Ranges

* More ranges

* Review
  • Loading branch information
Nashtare authored Oct 10, 2024
1 parent 08976ab commit e859d84
Show file tree
Hide file tree
Showing 18 changed files with 174 additions and 201 deletions.
17 changes: 8 additions & 9 deletions evm_arithmetization/src/cpu/kernel/asm/cdk_pre_execution.asm
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,18 @@ global update_scalable_l1blockhash:
PROVER_INPUT(ger)
// stack: l1blockhash?, retdest
DUP1 %eq_const(@U256_MAX) %jumpi(skip_and_exit)
// stack: l1blockhash, retdest
PUSH @SEGMENT_KERNEL_GENERAL
// stack: addr, l1blockhash, retdest
PUSH @GLOBAL_EXIT_ROOT_STORAGE_POS
PROVER_INPUT(ger)
// stack: root, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest
PUSH @SEGMENT_KERNEL_GENERAL
// stack: addr, root, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest
// stack: root, GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest
DUP3
// stack: addr, root, GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest
MSTORE_32BYTES_32
// stack: addr, GLOBAL_EXIT_ROOT_STORAGE_POS, l1blockhash, retdest
// stack: addr', GLOBAL_EXIT_ROOT_STORAGE_POS, addr, l1blockhash, retdest
MSTORE_32BYTES_32
// stack: addr, l1blockhash, retdest
POP
// stack: l1blockhash, retdest
PUSH 64 PUSH @SEGMENT_KERNEL_GENERAL
// stack: addr'', addr, l1blockhash, retdest
%stack (addr_2, addr) -> (addr, 64)
// stack: addr, len, l1blockhash, retdest
KECCAK_GENERAL
// stack: slot, l1blockhash, retdest
Expand Down
22 changes: 1 addition & 21 deletions evm_arithmetization/src/cpu/kernel/asm/core/gas.asm
Original file line number Diff line number Diff line change
Expand Up @@ -16,36 +16,16 @@ global sys_gas:
%endmacro


// TODO: `%refund_gas` and `refund_gas_hook` are hooks used for debugging. They should be removed at some point and `refund_gas_original` renamed to `refund_gas`.
%macro refund_gas
PUSH %%after %jump(refund_gas_hook)
%%after:
%refund_gas_original
%endmacro

global refund_gas_hook:
JUMP

%macro refund_gas_original
// stack: amount
DUP1 %journal_refund
%mload_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER)
ADD
%mstore_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER)
%endmacro

// TODO: `%charge_gas` and `charge_gas_hook` are hooks used for debugging. They should be removed at some point and `charge_gas_original` renamed to `charge_gas`.
%macro charge_gas
PUSH %%after %jump(charge_gas_hook)
%%after:
%charge_gas_original
%endmacro

global charge_gas_hook:
JUMP

// Charge gas. Faults if we exceed the limit for the current context.
%macro charge_gas_original
%macro charge_gas
// stack: gas, kexit_info
%shl_const(192)
ADD
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,47 +75,39 @@ global precompile_blake2_f:
SWAP1
// stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info

%sub_const(8)
// stack: m0_addr + 8 * (16 - 1), t_0, t_1, flag, blake2_f_contd, kexit_info

PUSH @SEGMENT_CALLDATA
GET_CONTEXT
%build_address_no_offset

%rep 16
// stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%sub_const(8)
// stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
DUP1
// stack: m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
PUSH @SEGMENT_CALLDATA
// stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%build_address
// stack: base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
DUP2 DUP2
// stack: base_addr, m0_addr + 8 * (16 - i - 1), base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
ADD // base_addr + offset
%mload_packing_u64_LE
// stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
SWAP1
// stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
// stack: m_i, base_addr, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
SWAP2 %sub_const(8) SWAP1
// stack: base_addr, m0_addr + 8 * (16 - i - 2), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%endrep
// stack: m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
// stack: base_addr, m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info

%rep 8
// stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%sub_const(8)
// stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
DUP1
// stack: h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
PUSH @SEGMENT_CALLDATA
// stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%build_address
// stack: base_addr, h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
DUP2 DUP2
// stack: base_addr, h0_addr + 8 * (8 - i - 1), base_addr, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
ADD // base_addr + offset
%mload_packing_u64_LE
// stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
SWAP1
// stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
// stack: h_i, base_addr, h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
SWAP2 %sub_const(8) SWAP1
// stack: base_addr, h0_addr + 8 * (8 - i - 1), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%endrep
// stack: h0_addr + 8 * 8 = 68, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
POP

%stack () -> (@SEGMENT_CALLDATA, 4)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 4, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
%build_address_no_offset
// stack: base_addr, garbage, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
PUSH 4 SWAP2 POP
// stack: base_addr, 4, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
MLOAD_32BYTES

// stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
Expand Down
51 changes: 24 additions & 27 deletions evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_add.asm
Original file line number Diff line number Diff line change
Expand Up @@ -14,50 +14,47 @@ global precompile_bn_add:

%charge_gas_const(@BN_ADD_GAS)

GET_CONTEXT
PUSH @SEGMENT_CALLDATA
%build_address_no_offset
// stack: base_addr, kexit_info

// Load x0, y0, x1, y1 from the call data using `MLOAD_32BYTES`.
PUSH bn_add_return
// stack: bn_add_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 96, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 96, 32, bn_add_return, kexit_info
%build_address
// stack: bn_add_return, base_addr, kexit_info
%stack (bn_add_return, base_addr) -> (base_addr, 96, 32, bn_add_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: y1, bn_add_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 64, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 64, 32, y1, bn_add_return, kexit_info
%build_address
// stack: y1, bn_add_return, base_addr, kexit_info
%stack (y1, bn_add_return, base_addr) -> (base_addr, 64, 32, y1, bn_add_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: x1, y1, bn_add_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, 32, x1, y1, bn_add_return, kexit_info
%build_address
// stack: x1, y1, bn_add_return, base_addr, kexit_info
%stack (x1, y1, bn_add_return, base_addr) -> (base_addr, 32, 32, x1, y1, bn_add_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: y0, x1, y1, bn_add_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, y0, x1, y1, bn_add_return, kexit_info
%build_address_no_offset
// stack: y0, x1, y1, bn_add_return, base_addr, kexit_info
%stack (y0, x1, y1, bn_add_return, base_addr) -> (base_addr, 32, y0, x1, y1, bn_add_return, base_addr)
MLOAD_32BYTES
// stack: x0, y0, x1, y1, bn_add_return, kexit_info
// stack: x0, y0, x1, y1, bn_add_return, base_addr, kexit_info
%jump(bn_add)
bn_add_return:
// stack: x, y, kexit_info
// stack: x, y, base_addr, kexit_info
DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input.
DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input.
MUL // Cheaper than AND
%jumpi(fault_exception)
// stack: x, y, kexit_info
// stack: x, y, base_addr, kexit_info

// Store the result (x, y) to the parent's return data using `mstore_unpacking`.
%mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64)
%mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
%stack (parent_ctx, x, y) -> (parent_ctx, @SEGMENT_RETURNDATA, x, parent_ctx, y)
%stack (parent_ctx, x, y) -> (parent_ctx, @SEGMENT_RETURNDATA, x, y)
%build_address_no_offset
// stack: addr_x, x, y, base_addr, kexit_info
MSTORE_32BYTES_32
POP
%stack (parent_ctx, y) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, y)
%build_address
// stack: addr_y = addr_x + 32, y, base_addr, kexit_info
MSTORE_32BYTES_32
// stack: addr, base_addr, kexit_info
POP
%jump(pop_and_return_success)
34 changes: 17 additions & 17 deletions evm_arithmetization/src/cpu/kernel/asm/core/precompiles/bn_mul.asm
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,28 @@ global precompile_bn_mul:

%charge_gas_const(@BN_MUL_GAS)

GET_CONTEXT
PUSH @SEGMENT_CALLDATA
%build_address_no_offset
// stack: base_addr, kexit_info

// Load x, y, n from the call data using `MLOAD_32BYTES`.
PUSH bn_mul_return
// stack: bn_mul_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 64, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 64, 32, bn_mul_return, kexit_info
%build_address
// stack: bn_mul_return, base_addr, kexit_info
%stack (bn_mul_return, base_addr) -> (base_addr, 64, 32, bn_mul_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: n, bn_mul_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, 32, n, bn_mul_return, kexit_info
%build_address
// stack: n, bn_mul_return, base_addr, kexit_info
%stack (n, bn_mul_return, base_addr) -> (base_addr, 32, 32, n, bn_mul_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: y, n, bn_mul_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, y, n, bn_mul_return, kexit_info
%build_address_no_offset
// stack: y, n, bn_mul_return, base_addr, kexit_info
%stack (y, n, bn_mul_return, base_addr) -> (base_addr, 32, y, n, bn_mul_return, base_addr)
MLOAD_32BYTES
// stack: x, y, n, bn_mul_return, kexit_info
// stack: x, y, n, bn_mul_return, base_addr, kexit_info
%jump(bn_mul)
bn_mul_return:
// stack: Px, Py, kexit_info
// stack: Px, Py, base_addr, kexit_info
DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input.
DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input.
MUL // Cheaper than AND
Expand All @@ -55,4 +53,6 @@ bn_mul_contd6:
%stack (parent_ctx, Py) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, Py)
%build_address
MSTORE_32BYTES_32
// stack: addr, base_addr, kexit_info
POP
%jump(pop_and_return_success)
45 changes: 23 additions & 22 deletions evm_arithmetization/src/cpu/kernel/asm/core/precompiles/ecrec.asm
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,33 @@ global precompile_ecrec:

%charge_gas_const(@ECREC_GAS)

GET_CONTEXT
PUSH @SEGMENT_CALLDATA
%build_address_no_offset
// stack: base_addr, kexit_info

// Load hash, v, r, s from the call data using `MLOAD_32BYTES`.
PUSH ecrec_return
// stack: ecrec_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 96, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 96, 32, ecrec_return, kexit_info
%build_address
// stack: ecrec_return, base_addr, kexit_info

%stack (ecrec_return, base_addr) -> (base_addr, 96, 32, ecrec_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: s, ecrec_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 64, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 64, 32, s, ecrec_return, kexit_info
%build_address
// stack: s, ecrec_return, base_addr, kexit_info
%stack (s, ecrec_return, base_addr) -> (base_addr, 64, 32, s, ecrec_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: r, s, ecrec_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, 32, r, s, ecrec_return, kexit_info
%build_address
// stack: r, s, ecrec_return, base_addr, kexit_info
%stack (r, s, ecrec_return, base_addr) -> (base_addr, 32, 32, r, s, ecrec_return, base_addr)
ADD // base_addr + offset
MLOAD_32BYTES
// stack: v, r, s, ecrec_return, kexit_info
%stack () -> (@SEGMENT_CALLDATA, 32)
GET_CONTEXT
// stack: ctx, @SEGMENT_CALLDATA, 32, v, r, s, ecrec_return, kexit_info
%build_address_no_offset
// stack: v, r, s, ecrec_return, base_addr, kexit_info
%stack (v, r, s, ecrec_return, base_addr) -> (base_addr, 32, v, r, s, ecrec_return, base_addr)
MLOAD_32BYTES
// stack: hash, v, r, s, ecrec_return, kexit_info
// stack: hash, v, r, s, ecrec_return, base_addr, kexit_info
%jump(ecrecover)
ecrec_return:
// stack: address, kexit_info
// stack: address, base_addr, kexit_info
DUP1 %eq_const(@U256_MAX) %jumpi(ecrec_bad_input) // ecrecover returns U256_MAX on bad input.

// Store the result address to the parent's return data using `mstore_unpacking`.
Expand All @@ -52,9 +49,13 @@ ecrec_return:
%stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, address)
%build_address_no_offset
MSTORE_32BYTES_32
// stack: addr, base_addr, kexit_info
POP
%jump(pop_and_return_success)

// On bad input, return empty return data but still return success.
ecrec_bad_input:
%mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
// stack: addr, base_addr, kexit_info
POP
%jump(pop_and_return_success)
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ store_limbs_return:
%macro expmod_gas_f
// stack: x
// Overflow check
DUP1 %ge_const(0x800000000000000000000000000000007) %jumpi(fault_exception)
DUP1 %gt_const(0x800000000000000000000000000000006) %jumpi(fault_exception)
// stack: x
%ceil_div_const(8)
// stack: ceil(x/8)
Expand All @@ -100,7 +100,7 @@ calculate_l_E_prime:
DUP1 %gt_const(0x100000000000000000000000000000000) %jumpi(fault_exception)
DUP1 ISZERO %jumpi(case_le_zero)
// stack: l_E, l_B, retdest
DUP1 %le_const(32)
DUP1 %lt_const(33)
// stack: l_E <= 32, l_E, l_B, retdest
%jumpi(case_le_32)
// stack: l_E, l_B, retdest
Expand All @@ -121,7 +121,7 @@ calculate_l_E_prime:
// stack: l_E, log2(i[96 + l_B..128 + l_B]), l_B, retdest
%sub_const(32)
// Overflow check
DUP1 %ge_const(0x2000000000000000000000000000000000000000000000000000000000000000) %jumpi(fault_exception)
DUP1 %gt_const(0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff) %jumpi(fault_exception)
%mul_const(8)
// stack: 8 * (l_E - 32), log2(i[96 + l_B..128 + l_B]), l_B, retdest
ADD
Expand Down
2 changes: 1 addition & 1 deletion evm_arithmetization/src/cpu/kernel/asm/core/terminate.asm
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ revert_after_gas:
%stack (addr, size, parent_ctx, kexit_info) ->
(
parent_ctx, @SEGMENT_RETURNDATA, // DST
addr, // SRC
addr, // SRC
size, sys_revert_finish, kexit_info // count, retdest, ...
)
%build_address_no_offset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ global bn_glv_decompose:
// along with a flag `underflow` set to 1 if there is an underflow, 0 otherwise.
ADD %bn_sub_check_underflow
// stack: k2, underflow, N, k, retdest
DUP1 %ge_const(0x80000000000000000000000000000000) %jumpi(negate)
DUP1 %gt_const(0x7fffffffffffffffffffffffffffffff) %jumpi(negate)
%jump(contd)
negate:
// stack: k2, underflow, N, k, retdest
Expand Down
Loading

0 comments on commit e859d84

Please sign in to comment.