From ebf055d58595ba68142695805a09cf8cde98b9c1 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Fri, 1 Sep 2017 20:33:50 +0200 Subject: [PATCH] update c11 like tribus + 2.2.1 readme --- README.txt | 4 +- compat/ccminer-config.h | 376 ++++++++++++++++++++-------------------- tribus/tribus.cu | 1 + x11/c11.cu | 46 +++-- 4 files changed, 227 insertions(+), 200 deletions(-) diff --git a/README.txt b/README.txt index 0fa05f9b22..21e36cbee7 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccminer 2.2 (August 2017) "Equihash, tribus and optimized skunk" +ccminer 2.2.1 (Sept. 2017) "optimized tribus kernel (Maxwell+)" --------------------------------------------------------------- *************************************************************** @@ -277,6 +277,8 @@ so we can more efficiently implement new algorithms using the latest hardware features. >>> RELEASE HISTORY <<< + Sep. 01st 2017 v2.2.1 + Improve tribus algo on recent cards (up to +10%) Aug. 13th 2017 v2.2 New skunk algo, using the heavy streebog algorithm diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h index d07e736d21..375f51315b 100644 --- a/compat/ccminer-config.h +++ b/compat/ccminer-config.h @@ -1,188 +1,188 @@ -/* CONFIG ONLY FOR MS VC++ BUILD */ - -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -/* #undef CRAY_STACKSEG_END */ - -/* Define to 1 if using `alloca.c'. */ -/* #undef C_ALLOCA */ - -/* Define to 1 if you have `alloca', as a function or macro. */ -#define HAVE_ALLOCA 1 - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#define HAVE_ALLOCA_H 1 - -/* Define to 1 if you have the declaration of `be32dec', and to 0 if you - don't. */ -#define HAVE_DECL_BE32DEC 0 - -/* Define to 1 if you have the declaration of `be32enc', and to 0 if you - don't. */ -#define HAVE_DECL_BE32ENC 0 - -/* Define to 1 if you have the declaration of `le32dec', and to 0 if you - don't. */ -#define HAVE_DECL_LE32DEC 0 - -/* Define to 1 if you have the declaration of `le32enc', and to 0 if you - don't. */ -#define HAVE_DECL_LE32ENC 0 - -/* Define to 1 if you have the `getopt_long' function. */ -#define HAVE_GETOPT_LONG 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the `crypto' library (-lcrypto). */ -#define HAVE_LIBCRYPTO 1 - -/* Define to 1 if you have a functional curl library. */ -#define HAVE_LIBCURL 1 - -/* Define to 1 if you have the `ssl' library (-lssl). */ -#define HAVE_LIBSSL 1 - -/* Define to 1 if you have the `z' library (-lz). */ -#define HAVE_LIBZ 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_SYSLOG_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_SYS_ENDIAN_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_PARAM_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_SYSCTL_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Defined if libcurl supports AsynchDNS */ -/* #undef LIBCURL_FEATURE_ASYNCHDNS */ - -/* Defined if libcurl supports IDN */ -#define LIBCURL_FEATURE_IDN 1 - -/* Defined if libcurl supports IPv6 */ -#define LIBCURL_FEATURE_IPV6 1 - -/* Defined if libcurl supports KRB4 */ -/* #undef LIBCURL_FEATURE_KRB4 */ - -/* Defined if libcurl supports libz */ -#define LIBCURL_FEATURE_LIBZ 1 - -/* Defined if libcurl supports NTLM */ -#define LIBCURL_FEATURE_NTLM 1 - -/* Defined if libcurl supports SSL */ -#define LIBCURL_FEATURE_SSL 1 - -/* Defined if libcurl supports SSPI */ -/* #undef LIBCURL_FEATURE_SSPI */ - -/* Defined if libcurl supports DICT */ -/* #undef LIBCURL_PROTOCOL_DICT */ - -/* Defined if libcurl supports FILE */ -#define LIBCURL_PROTOCOL_FILE 1 - -/* Defined if libcurl supports FTP */ -#define LIBCURL_PROTOCOL_FTP 1 - -/* Defined if libcurl supports FTPS */ -#define LIBCURL_PROTOCOL_FTPS 1 - -/* Defined if libcurl supports HTTP */ -#define LIBCURL_PROTOCOL_HTTP 1 - -/* Defined if libcurl supports HTTPS */ -#define LIBCURL_PROTOCOL_HTTPS 1 - -/* Defined if libcurl supports IMAP */ -/* #undef LIBCURL_PROTOCOL_IMAP */ - -/* Defined if libcurl supports LDAP */ -/* #undef LIBCURL_PROTOCOL_LDAP */ - -/* Defined if libcurl supports POP3 */ -/* #undef LIBCURL_PROTOCOL_POP3 */ - -/* Defined if libcurl supports RTSP */ -/* #undef LIBCURL_PROTOCOL_RTSP */ - -/* Defined if libcurl supports SMTP */ -/* #undef LIBCURL_PROTOCOL_SMTP */ - -/* Defined if libcurl supports TELNET */ -/* #undef LIBCURL_PROTOCOL_TELNET */ - -/* Defined if libcurl supports TFTP */ -/* #undef LIBCURL_PROTOCOL_TFTP */ - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* Name of package */ -#define PACKAGE "ccminer" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "ccminer" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "http://github.com/tpruvot/ccminer" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "2.2" - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at runtime. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ -/* #undef STACK_DIRECTION */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define curl_free() as free() if our version of curl lacks curl_free. */ -/* #undef curl_free */ - -/* Define to `unsigned int' if does not define. */ -//#define size_t unsigned int - -#if !defined(HAVE_STRUCT_TIMESPEC) && _MSC_VER >= 1900 -#define HAVE_STRUCT_TIMESPEC -#endif +/* CONFIG ONLY FOR MS VC++ BUILD */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +/* #undef CRAY_STACKSEG_END */ + +/* Define to 1 if using `alloca.c'. */ +/* #undef C_ALLOCA */ + +/* Define to 1 if you have `alloca', as a function or macro. */ +#define HAVE_ALLOCA 1 + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#define HAVE_ALLOCA_H 1 + +/* Define to 1 if you have the declaration of `be32dec', and to 0 if you + don't. */ +#define HAVE_DECL_BE32DEC 0 + +/* Define to 1 if you have the declaration of `be32enc', and to 0 if you + don't. */ +#define HAVE_DECL_BE32ENC 0 + +/* Define to 1 if you have the declaration of `le32dec', and to 0 if you + don't. */ +#define HAVE_DECL_LE32DEC 0 + +/* Define to 1 if you have the declaration of `le32enc', and to 0 if you + don't. */ +#define HAVE_DECL_LE32ENC 0 + +/* Define to 1 if you have the `getopt_long' function. */ +#define HAVE_GETOPT_LONG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have a functional curl library. */ +#define HAVE_LIBCURL 1 + +/* Define to 1 if you have the `ssl' library (-lssl). */ +#define HAVE_LIBSSL 1 + +/* Define to 1 if you have the `z' library (-lz). */ +#define HAVE_LIBZ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYSLOG_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_ENDIAN_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSCTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Defined if libcurl supports AsynchDNS */ +/* #undef LIBCURL_FEATURE_ASYNCHDNS */ + +/* Defined if libcurl supports IDN */ +#define LIBCURL_FEATURE_IDN 1 + +/* Defined if libcurl supports IPv6 */ +#define LIBCURL_FEATURE_IPV6 1 + +/* Defined if libcurl supports KRB4 */ +/* #undef LIBCURL_FEATURE_KRB4 */ + +/* Defined if libcurl supports libz */ +#define LIBCURL_FEATURE_LIBZ 1 + +/* Defined if libcurl supports NTLM */ +#define LIBCURL_FEATURE_NTLM 1 + +/* Defined if libcurl supports SSL */ +#define LIBCURL_FEATURE_SSL 1 + +/* Defined if libcurl supports SSPI */ +/* #undef LIBCURL_FEATURE_SSPI */ + +/* Defined if libcurl supports DICT */ +/* #undef LIBCURL_PROTOCOL_DICT */ + +/* Defined if libcurl supports FILE */ +#define LIBCURL_PROTOCOL_FILE 1 + +/* Defined if libcurl supports FTP */ +#define LIBCURL_PROTOCOL_FTP 1 + +/* Defined if libcurl supports FTPS */ +#define LIBCURL_PROTOCOL_FTPS 1 + +/* Defined if libcurl supports HTTP */ +#define LIBCURL_PROTOCOL_HTTP 1 + +/* Defined if libcurl supports HTTPS */ +#define LIBCURL_PROTOCOL_HTTPS 1 + +/* Defined if libcurl supports IMAP */ +/* #undef LIBCURL_PROTOCOL_IMAP */ + +/* Defined if libcurl supports LDAP */ +/* #undef LIBCURL_PROTOCOL_LDAP */ + +/* Defined if libcurl supports POP3 */ +/* #undef LIBCURL_PROTOCOL_POP3 */ + +/* Defined if libcurl supports RTSP */ +/* #undef LIBCURL_PROTOCOL_RTSP */ + +/* Defined if libcurl supports SMTP */ +/* #undef LIBCURL_PROTOCOL_SMTP */ + +/* Defined if libcurl supports TELNET */ +/* #undef LIBCURL_PROTOCOL_TELNET */ + +/* Defined if libcurl supports TFTP */ +/* #undef LIBCURL_PROTOCOL_TFTP */ + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* Name of package */ +#define PACKAGE "ccminer" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "ccminer" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "http://github.com/tpruvot/ccminer" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.2.1" + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +/* #undef STACK_DIRECTION */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define curl_free() as free() if our version of curl lacks curl_free. */ +/* #undef curl_free */ + +/* Define to `unsigned int' if does not define. */ +//#define size_t unsigned int + +#if !defined(HAVE_STRUCT_TIMESPEC) && _MSC_VER >= 1900 +#define HAVE_STRUCT_TIMESPEC +#endif diff --git a/tribus/tribus.cu b/tribus/tribus.cu index 25218e4b84..4516e7d69c 100644 --- a/tribus/tribus.cu +++ b/tribus/tribus.cu @@ -146,6 +146,7 @@ extern "C" int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce gpu_increment_reject(thr_id); if (!opt_quiet) gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t)); pdata[19] = work->nonces[0] + 1; continue; } diff --git a/x11/c11.cu b/x11/c11.cu index 5dee17455a..8f8f6663b2 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -18,10 +18,13 @@ extern "C" #include "cuda_helper.h" #include "cuda_x11.h" +void tribus_echo512_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t *d_resNonce, const uint64_t target); + #include #include static uint32_t *d_hash[MAX_GPUS]; +static uint32_t *d_resNonce[MAX_GPUS]; // Flax/Chaincoin C11 CPU Hash extern "C" void c11hash(void *output, const void *input) @@ -103,6 +106,7 @@ extern "C" void c11hash(void *output, const void *input) #endif static bool init[MAX_GPUS] = { 0 }; +static bool use_compat_kernels[MAX_GPUS] = { 0 }; extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) { @@ -118,7 +122,8 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u if (!init[thr_id]) { - cudaSetDevice(device_map[thr_id]); + int dev_id = device_map[thr_id]; + cudaSetDevice(dev_id); if (opt_cudaschedule == -1 && gpu_threads == 1) { cudaDeviceReset(); // reduce cpu usage @@ -127,6 +132,9 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u } gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + cuda_get_arch(thr_id); + use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500); + quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -135,11 +143,13 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u quark_jh512_cpu_init(thr_id, throughput); x11_luffaCubehash512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput); - x11_echo512_cpu_init(thr_id, throughput); + if (use_compat_kernels[thr_id]) + x11_echo512_cpu_init(thr_id, throughput); if (x11_simd512_cpu_init(thr_id, throughput) != 0) { return 0; } - CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); // why 64 ? + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); + CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t))); cuda_check_cpu_init(thr_id, throughput); @@ -151,7 +161,10 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u be32enc(&endiandata[k], pdata[k]); quark_blake512_cpu_setBlock_80(thr_id, endiandata); - cuda_check_cpu_setTarget(ptarget); + if (use_compat_kernels[thr_id]) + cuda_check_cpu_setTarget(ptarget); + else + cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t)); do { int order = 0; @@ -175,24 +188,32 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u TRACE("shavite:"); x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE("simd :"); - x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - TRACE("echo => "); + + if (use_compat_kernels[thr_id]) { + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + work->nonces[1] = UINT32_MAX; + } else { + tribus_echo512_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], AS_U64(&ptarget[6])); + cudaMemcpy(&work->nonces[0], d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost); + } *hashes_done = pdata[19] - first_nonce + throughput; - work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); if (work->nonces[0] != UINT32_MAX) { - const uint32_t Htarg = ptarget[7]; uint32_t _ALIGN(64) vhash[8]; + const uint32_t Htarg = ptarget[7]; + const uint32_t startNounce = pdata[19]; + if (!use_compat_kernels[thr_id]) work->nonces[0] += startNounce; be32enc(&endiandata[19], work->nonces[0]); c11hash(vhash, endiandata); if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { work->valid_nonces = 1; work_set_target_ratio(work, vhash); - work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - if (work->nonces[1] != 0) { + if (work->nonces[1] != UINT32_MAX) { + work->nonces[1] += startNounce; be32enc(&endiandata[19], work->nonces[1]); c11hash(vhash, endiandata); bn_set_target_ratio(work, vhash, 1); @@ -206,7 +227,8 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u else if (vhash[7] > Htarg) { gpu_increment_reject(thr_id); if (!opt_quiet) - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t)); pdata[19] = work->nonces[0] + 1; continue; } @@ -234,6 +256,8 @@ extern "C" void free_c11(int thr_id) cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); + cudaFree(d_resNonce[thr_id]); + quark_blake512_cpu_free(thr_id); quark_groestl512_cpu_free(thr_id); x11_simd512_cpu_free(thr_id);