diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index e15c474a6..78bcd33b0 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -724,12 +724,18 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto extra_hashes[ctx[1]->hash_state[0] & 3](ctx[1]->hash_state, 200, (char*)output + 32); } -#define CN_STEP1(a, b, c, l, ptr, idx) \ +#define CN_STEP1_A(a, b, c, l, ptr, idx) \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ - _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ + _mm_prefetch((const char*)ptr, _MM_HINT_T0); + +#define CN_STEP1_B(a, b, c, l, ptr, idx) \ c = _mm_load_si128(ptr); +#define CN_STEP1(a, b, c, l, ptr, idx) \ + CN_STEP1_A(a, b, c, l, ptr, idx) \ + CN_STEP1_B(a, b, c, l, ptr, idx) + #define CN_STEP2(a, b, c, l, ptr, idx) \ if(SOFT_AES) \ c = soft_aesenc(c, a); \ @@ -741,13 +747,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ _mm_store_si128(ptr, b);\ -#define CN_STEP3(a, b, c, l, ptr, idx) \ +#define CN_STEP3_A(a, b, c, l, ptr, idx) \ idx = _mm_cvtsi128_si64(c); \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ - _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ + _mm_prefetch((const char*)ptr, _MM_HINT_T0); + +#define CN_STEP3_B(a, b, c, l, ptr, idx) \ b = _mm_load_si128(ptr); +#define CN_STEP3(a, b, c, l, ptr, idx) \ + CN_STEP3_A(a, b, c, l, ptr, idx) \ + CN_STEP3_B(a, b, c, l, ptr, idx) + #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ @@ -770,6 +782,20 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto idx = d ^ q; \ } +#define CN2_STEP1(a, b, c, l, mc, ptr, idx) \ + CN_STEP3_B(a, c, b, l, ptr, idx) \ + CN_STEP4(a, c, b, l, mc, ptr, idx) \ + CN_STEP1_A(a, b, c, l, ptr, idx) + +#define CN2_STEP2(a, b, c, l, mc, ptr, idx) \ + CN_STEP1_B(a, b, c, l, ptr, idx) \ + CN_STEP2(a, b, c, l, ptr, idx) \ + CN_STEP3_A(a, b, c, l, ptr, idx) + +#define CN2_STEP3(a, b, c, l, mc, ptr, idx) \ + CN_STEP3_B(a, b, c, l, ptr, idx) \ + CN_STEP4(a, b, c, l, mc, ptr, idx) + #define CONST_INIT(ctx, n) \ __m128i mc##n = _mm_set_epi64x(*reinterpret_cast(reinterpret_cast(input) + n * len + 35) ^ \ *(reinterpret_cast((ctx)->hash_state) + 24), 0); @@ -1100,3 +1126,388 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); } } + +// this seems to improve PREFETCH performance on a few CPU's that I tried with monero7 +// +// #! /bin/sh +// INDEX=0 +// while [ "$INDEX" -lt 20 ] +// do +// echo "CONST_INIT(ctx[$INDEX], $INDEX);" +// INDEX=`expr $INDEX + 1` +// done +// +// feel free to clean up the macros and comments if you want it +// +// 452YzXHGTKVf6a9zbqaSBLPHeNsZVTvkhLPUv2hn6oRgXNP95ikasL64nC8oeqXmMSbKTeMfPbVHNfF8otAuCqHXEEWVxxw +// +template +void cryptonight_twenty_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) +{ + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + { + memset(output, 0, 32 * 5); + return; + } + + for (size_t i = 0; i < 20; i++) + { + keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + } + + CONST_INIT(ctx[0], 0); + CONST_INIT(ctx[1], 1); + CONST_INIT(ctx[2], 2); + CONST_INIT(ctx[3], 3); + CONST_INIT(ctx[4], 4); + CONST_INIT(ctx[5], 5); + CONST_INIT(ctx[6], 6); + CONST_INIT(ctx[7], 7); + CONST_INIT(ctx[8], 8); + CONST_INIT(ctx[9], 9); + CONST_INIT(ctx[10], 10); + CONST_INIT(ctx[11], 11); + CONST_INIT(ctx[12], 12); + CONST_INIT(ctx[13], 13); + CONST_INIT(ctx[14], 14); + CONST_INIT(ctx[15], 15); + CONST_INIT(ctx[16], 16); + CONST_INIT(ctx[17], 17); + CONST_INIT(ctx[18], 18); + CONST_INIT(ctx[19], 19); + + uint8_t* l0 = ctx[0]->long_state; + uint8_t* l1 = ctx[1]->long_state; + uint8_t* l2 = ctx[2]->long_state; + uint8_t* l3 = ctx[3]->long_state; + uint8_t* l4 = ctx[4]->long_state; + uint8_t* l5 = ctx[5]->long_state; + uint8_t* l6 = ctx[6]->long_state; + uint8_t* l7 = ctx[7]->long_state; + uint8_t* l8 = ctx[8]->long_state; + uint8_t* l9 = ctx[9]->long_state; + uint8_t* l10 = ctx[10]->long_state; + uint8_t* l11 = ctx[11]->long_state; + uint8_t* l12 = ctx[12]->long_state; + uint8_t* l13 = ctx[13]->long_state; + uint8_t* l14 = ctx[14]->long_state; + uint8_t* l15 = ctx[15]->long_state; + uint8_t* l16 = ctx[16]->long_state; + uint8_t* l17 = ctx[17]->long_state; + uint8_t* l18 = ctx[18]->long_state; + uint8_t* l19 = ctx[19]->long_state; + + uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; + uint64_t* h1 = (uint64_t*)ctx[1]->hash_state; + uint64_t* h2 = (uint64_t*)ctx[2]->hash_state; + uint64_t* h3 = (uint64_t*)ctx[3]->hash_state; + uint64_t* h4 = (uint64_t*)ctx[4]->hash_state; + uint64_t* h5 = (uint64_t*)ctx[5]->hash_state; + uint64_t* h6 = (uint64_t*)ctx[6]->hash_state; + uint64_t* h7 = (uint64_t*)ctx[7]->hash_state; + uint64_t* h8 = (uint64_t*)ctx[8]->hash_state; + uint64_t* h9 = (uint64_t*)ctx[9]->hash_state; + uint64_t* h10 = (uint64_t*)ctx[10]->hash_state; + uint64_t* h11 = (uint64_t*)ctx[11]->hash_state; + uint64_t* h12 = (uint64_t*)ctx[12]->hash_state; + uint64_t* h13 = (uint64_t*)ctx[13]->hash_state; + uint64_t* h14 = (uint64_t*)ctx[14]->hash_state; + uint64_t* h15 = (uint64_t*)ctx[15]->hash_state; + uint64_t* h16 = (uint64_t*)ctx[16]->hash_state; + uint64_t* h17 = (uint64_t*)ctx[17]->hash_state; + uint64_t* h18 = (uint64_t*)ctx[18]->hash_state; + uint64_t* h19 = (uint64_t*)ctx[19]->hash_state; + + __m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); + __m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); + __m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); + __m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]); + __m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]); + __m128i ax5 = _mm_set_epi64x(h5[1] ^ h5[5], h5[0] ^ h5[4]); + __m128i ax6 = _mm_set_epi64x(h6[1] ^ h6[5], h6[0] ^ h6[4]); + __m128i ax7 = _mm_set_epi64x(h7[1] ^ h7[5], h7[0] ^ h7[4]); + __m128i ax8 = _mm_set_epi64x(h8[1] ^ h8[5], h8[0] ^ h8[4]); + __m128i ax9 = _mm_set_epi64x(h9[1] ^ h9[5], h9[0] ^ h9[4]); + __m128i ax10 = _mm_set_epi64x(h10[1] ^ h10[5], h10[0] ^ h10[4]); + __m128i ax11 = _mm_set_epi64x(h11[1] ^ h11[5], h11[0] ^ h11[4]); + __m128i ax12 = _mm_set_epi64x(h12[1] ^ h12[5], h12[0] ^ h12[4]); + __m128i ax13 = _mm_set_epi64x(h13[1] ^ h13[5], h13[0] ^ h13[4]); + __m128i ax14 = _mm_set_epi64x(h14[1] ^ h14[5], h14[0] ^ h14[4]); + __m128i ax15 = _mm_set_epi64x(h15[1] ^ h15[5], h15[0] ^ h15[4]); + __m128i ax16 = _mm_set_epi64x(h16[1] ^ h16[5], h16[0] ^ h16[4]); + __m128i ax17 = _mm_set_epi64x(h17[1] ^ h17[5], h17[0] ^ h17[4]); + __m128i ax18 = _mm_set_epi64x(h18[1] ^ h18[5], h18[0] ^ h18[4]); + __m128i ax19 = _mm_set_epi64x(h19[1] ^ h19[5], h19[0] ^ h19[4]); + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); + __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); + __m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]); + __m128i bx5 = _mm_set_epi64x(h5[3] ^ h5[7], h5[2] ^ h5[6]); + __m128i bx6 = _mm_set_epi64x(h6[3] ^ h6[7], h6[2] ^ h6[6]); + __m128i bx7 = _mm_set_epi64x(h7[3] ^ h7[7], h7[2] ^ h7[6]); + __m128i bx8 = _mm_set_epi64x(h8[3] ^ h8[7], h8[2] ^ h8[6]); + __m128i bx9 = _mm_set_epi64x(h9[3] ^ h9[7], h9[2] ^ h9[6]); + __m128i bx10 = _mm_set_epi64x(h10[3] ^ h10[7], h10[2] ^ h10[6]); + __m128i bx11 = _mm_set_epi64x(h11[3] ^ h11[7], h11[2] ^ h11[6]); + __m128i bx12 = _mm_set_epi64x(h12[3] ^ h12[7], h12[2] ^ h12[6]); + __m128i bx13 = _mm_set_epi64x(h13[3] ^ h13[7], h13[2] ^ h13[6]); + __m128i bx14 = _mm_set_epi64x(h14[3] ^ h14[7], h14[2] ^ h14[6]); + __m128i bx15 = _mm_set_epi64x(h15[3] ^ h15[7], h15[2] ^ h15[6]); + __m128i bx16 = _mm_set_epi64x(h16[3] ^ h16[7], h16[2] ^ h16[6]); + __m128i bx17 = _mm_set_epi64x(h17[3] ^ h17[7], h17[2] ^ h17[6]); + __m128i bx18 = _mm_set_epi64x(h18[3] ^ h18[7], h18[2] ^ h18[6]); + __m128i bx19 = _mm_set_epi64x(h19[3] ^ h19[7], h19[2] ^ h19[6]); + + __m128i cx0 = _mm_set_epi64x(0, 0); + __m128i cx1 = _mm_set_epi64x(0, 0); + __m128i cx2 = _mm_set_epi64x(0, 0); + __m128i cx3 = _mm_set_epi64x(0, 0); + __m128i cx4 = _mm_set_epi64x(0, 0); + __m128i cx5 = _mm_set_epi64x(0, 0); + __m128i cx6 = _mm_set_epi64x(0, 0); + __m128i cx7 = _mm_set_epi64x(0, 0); + __m128i cx8 = _mm_set_epi64x(0, 0); + __m128i cx9 = _mm_set_epi64x(0, 0); + __m128i cx10 = _mm_set_epi64x(0, 0); + __m128i cx11 = _mm_set_epi64x(0, 0); + __m128i cx12 = _mm_set_epi64x(0, 0); + __m128i cx13 = _mm_set_epi64x(0, 0); + __m128i cx14 = _mm_set_epi64x(0, 0); + __m128i cx15 = _mm_set_epi64x(0, 0); + __m128i cx16 = _mm_set_epi64x(0, 0); + __m128i cx17 = _mm_set_epi64x(0, 0); + __m128i cx18 = _mm_set_epi64x(0, 0); + __m128i cx19 = _mm_set_epi64x(0, 0); + + uint64_t idx0 = _mm_cvtsi128_si64(ax0); + uint64_t idx1 = _mm_cvtsi128_si64(ax1); + uint64_t idx2 = _mm_cvtsi128_si64(ax2); + uint64_t idx3 = _mm_cvtsi128_si64(ax3); + uint64_t idx4 = _mm_cvtsi128_si64(ax4); + uint64_t idx5 = _mm_cvtsi128_si64(ax5); + uint64_t idx6 = _mm_cvtsi128_si64(ax6); + uint64_t idx7 = _mm_cvtsi128_si64(ax7); + uint64_t idx8 = _mm_cvtsi128_si64(ax8); + uint64_t idx9 = _mm_cvtsi128_si64(ax9); + uint64_t idx10 = _mm_cvtsi128_si64(ax10); + uint64_t idx11 = _mm_cvtsi128_si64(ax11); + uint64_t idx12 = _mm_cvtsi128_si64(ax12); + uint64_t idx13 = _mm_cvtsi128_si64(ax13); + uint64_t idx14 = _mm_cvtsi128_si64(ax14); + uint64_t idx15 = _mm_cvtsi128_si64(ax15); + uint64_t idx16 = _mm_cvtsi128_si64(ax16); + uint64_t idx17 = _mm_cvtsi128_si64(ax17); + uint64_t idx18 = _mm_cvtsi128_si64(ax18); + uint64_t idx19 = _mm_cvtsi128_si64(ax19); + + __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6, *ptr7, *ptr8, *ptr9; + __m128i *ptr10, *ptr11, *ptr12, *ptr13, *ptr14, *ptr15, *ptr16, *ptr17, *ptr18, *ptr19; + + uint64_t hi, lo; + + CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); + CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); + CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); + CN_STEP1_A(ax3, bx3, cx3, l3, ptr3, idx3); + CN_STEP1_A(ax4, bx4, cx4, l4, ptr4, idx4); + CN_STEP1_A(ax5, bx5, cx5, l5, ptr5, idx5); + CN_STEP1_A(ax6, bx6, cx6, l6, ptr6, idx6); + CN_STEP1_A(ax7, bx7, cx7, l7, ptr7, idx7); + CN_STEP1_A(ax8, bx8, cx8, l8, ptr8, idx8); + CN_STEP1_A(ax9, bx9, cx9, l9, ptr9, idx9); + CN_STEP1_A(ax10, bx10, cx10, l10, ptr10, idx10); + CN_STEP1_A(ax11, bx11, cx11, l11, ptr11, idx11); + CN_STEP1_A(ax12, bx12, cx12, l12, ptr12, idx12); + CN_STEP1_A(ax13, bx13, cx13, l13, ptr13, idx13); + CN_STEP1_A(ax14, bx14, cx14, l14, ptr14, idx14); + CN_STEP1_A(ax15, bx15, cx15, l15, ptr15, idx15); + CN_STEP1_A(ax16, bx16, cx16, l16, ptr16, idx16); + CN_STEP1_A(ax17, bx17, cx17, l17, ptr17, idx17); + CN_STEP1_A(ax18, bx18, cx18, l18, ptr18, idx18); + CN_STEP1_A(ax19, bx19, cx19, l19, ptr19, idx19); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + for (size_t i = 1; i < ITERATIONS/2; i++) + { + CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + } + + CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP3(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP3(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP3(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP3(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP3(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP3(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP3(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP3(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP3(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP3(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP3(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP3(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP3(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP3(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP3(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP3(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP3(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP3(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP3(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + for (size_t i = 0; i < 20; i++) + { + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + keccakf((uint64_t*)ctx[i]->hash_state, 24); + extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); + } +} diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 482c085e0..2504d88e7 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -119,6 +119,9 @@ minethd::minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, switch (iMultiway) { + case 6: + oWorkThd = std::thread(&minethd::twenty_work_main, this); + break; case 5: oWorkThd = std::thread(&minethd::penta_work_main, this); break; @@ -181,7 +184,7 @@ cryptonight_ctx* minethd::minethd_alloc_ctx() return nullptr; //Should never happen } -static constexpr size_t MAX_N = 5; +static constexpr size_t MAX_N = 20; bool minethd::self_test() { alloc_msg msg = { 0 }; @@ -276,12 +279,252 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); + hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); + bResult &= memcmp(out, + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + , 640) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite) { } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_monero) { + unsigned char out[32 * MAX_N]; + cn_hash_fun hashf; + cn_hash_fun_multi hashf_multi; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf( + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx[0]); + bResult = memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 32) == 0; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf( + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx[0]); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 32) == 0; + + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 64) == 0; + + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 64) == 0; + + hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 96) == 0; + + hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 96) == 0; + + hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 128) == 0; + + hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 128) == 0; + + hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 160) == 0; + + hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 160) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 640) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 640) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon) { @@ -531,6 +774,11 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, // therefore we will build a binary digit and select the // function as a two digit binary + size_t NN(N); + if(NN > 5) { + NN = 6; + } + uint8_t algv; switch(algo) { @@ -577,6 +825,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -594,6 +846,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -611,6 +867,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -628,6 +888,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -645,7 +909,11 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_double_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -662,6 +930,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -679,13 +951,17 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash }; std::bitset<2> digit; digit.set(0, !bHaveAes); digit.set(1, !bNoPrefetch); - - return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()]; + + return func_table[algv * 20 | (NN-2) << 2 | digit.to_ulong()]; } void minethd::double_work_main() @@ -708,6 +984,10 @@ void minethd::penta_work_main() multiway_work_main<5u>(); } +void minethd::twenty_work_main() { + multiway_work_main<20u>(); +} + template void minethd::prep_multiway_work(uint8_t *bWorkBlob, uint32_t **piNonce) { diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 2d40ce314..8aee370ae 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -46,6 +46,7 @@ class minethd : public iBackend void triple_work_main(); void quad_work_main(); void penta_work_main(); + void twenty_work_main(); uint64_t iJobNo;