-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tests: benchmarks: move pthread_pressure to benchmarks/posix
The pthread_pressure test was not a typical test per se. It was a benchmark in search of the proper home. Let's move it to the correct place in the Zephyr tree, add a doc, and provide some reporting. Currently, k_threads out-perform pthreads by almost a factor of 2. The theoretical maximum performance of pthreads would be at parity of k_threads, since pthreads are a wrapper around kernel threads. It would be great to reduce the gap. Signed-off-by: Chris Friedt <[email protected]>
- Loading branch information
Showing
10 changed files
with
327 additions
and
268 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
POSIX Thread Benchmark | ||
###################### | ||
|
||
Overview | ||
******** | ||
|
||
This benchmark creates and joins as many threads as possible within a configurable time window. | ||
It provides a rough comparison Zephyr's POSIX threads (pthreads) with Zephyr's kernel threads | ||
(k_threads) API, highlighting the overhead of the POSIX. Ideally, this overhead would shrink over | ||
time. | ||
|
||
Sample output of the benchmark:: | ||
|
||
*** Booting Zephyr OS build v4.0.0-1410-gfca33facee37 *** | ||
ASSERT: y | ||
BOARD: qemu_riscv64 | ||
NUM_CPUS: 1 | ||
TEST_DELAY_US: 0 | ||
TEST_DURATION_S: 5 | ||
SMP: n | ||
API, Thread ID, time(s), threads, cores, rate (threads/s/core) | ||
k_thread, ALL, 5, 47663, 1, 9532 | ||
pthread, ALL, 5, 28180, 1, 5636 | ||
PROJECT EXECUTION SUCCESSFUL | ||
|
||
To observe periodic statistics on a per-thread basis in addition to the summary of statistics | ||
printed at the end of execution, use CONFIG_TEST_PERIODIC_STATS. | ||
|
||
Several other options can be tuned on an as-needed basis: | ||
|
||
- CONFIG_MP_MAX_NUM_CPUS - Number of CPUs to use in parallel. | ||
- CONFIG_TEST_DURATION_S - Number of seconds to run the test. | ||
- CONFIG_TEST_DELAY_US - Microseconds to delay between pthread join and create. | ||
- CONFIG_TEST_KTHREADS - Exercise k_threads in the test app. | ||
- CONFIG_TEST_PTHREADS - Exercise pthreads in the test app. | ||
- CONFIG_TEST_STACK_SIZE - Size of each thread stack in this test. | ||
|
||
The following table summarizes the purposes of the different extra | ||
configuration files that are available to be used with this benchmark. | ||
A tester may mix and match them allowing them different scenarios to | ||
be easily compared the default. | ||
|
||
+-----------------------------+----------------------------------------+ | ||
| prj-assert.conf | Enable assertions for API verification | | ||
+-----------------------------+----------------------------------------+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
CONFIG_FORCE_NO_ASSERT=n | ||
CONFIG_ASSERT=y | ||
|
||
# May be enabled for GitHub CI to reduce host scheduling noise while running | ||
# several concurrent Qemu processes each under stressful SMP load. | ||
# CONFIG_PTHREAD_CREATE_BARRIER=y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
CONFIG_TEST=y | ||
CONFIG_FORCE_NO_ASSERT=y | ||
|
||
CONFIG_POSIX_API=y | ||
CONFIG_POSIX_AEP_CHOICE_BASE=y | ||
CONFIG_POSIX_PRIORITY_SCHEDULING=y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
/* | ||
* Copyright (c) 2023, Meta | ||
* Copyright (c) 2024, Tenstorrent AI ULC | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
#include <pthread.h> | ||
#include <stdio.h> | ||
|
||
#include <zephyr/sys/__assert.h> | ||
#include <zephyr/sys/util.h> | ||
|
||
#define STACK_SIZE K_THREAD_STACK_LEN(CONFIG_TEST_STACK_SIZE) | ||
|
||
/* update interval for printing stats */ | ||
#if CONFIG_TEST_DURATION_S >= 60 | ||
#define UPDATE_INTERVAL_S 10 | ||
#elif CONFIG_TEST_DURATION_S >= 30 | ||
#define UPDATE_INTERVAL_S 5 | ||
#else | ||
#define UPDATE_INTERVAL_S 1 | ||
#endif | ||
|
||
/* 32 threads is mainly a limitation of find_lsb_set() */ | ||
#define NUM_CPUS MIN(32, MIN(CONFIG_MP_MAX_NUM_CPUS, CONFIG_POSIX_THREAD_THREADS_MAX)) | ||
|
||
typedef int (*create_fn)(int i); | ||
typedef int (*join_fn)(int i); | ||
|
||
static void before(void); | ||
|
||
/* bitmask of available threads */ | ||
static bool alive[NUM_CPUS]; | ||
|
||
/* array of thread stacks */ | ||
static K_THREAD_STACK_ARRAY_DEFINE(thread_stacks, NUM_CPUS, STACK_SIZE); | ||
|
||
static struct k_thread k_threads[NUM_CPUS]; | ||
static uint64_t counters[NUM_CPUS]; | ||
static uint64_t prev_counters[NUM_CPUS]; | ||
|
||
static void print_stats(const char *tag, uint64_t now, uint64_t end) | ||
{ | ||
for (int i = 0; i < NUM_CPUS; ++i) { | ||
printf("%s, %d, %u, %llu, 1, %llu\n", tag, i, UPDATE_INTERVAL_S, counters[i], | ||
(counters[i] - prev_counters[i]) / UPDATE_INTERVAL_S); | ||
prev_counters[i] = counters[i]; | ||
} | ||
} | ||
|
||
static void print_group_stats(const char *tag) | ||
{ | ||
uint64_t count = 0; | ||
|
||
for (int i = 0; i < NUM_CPUS; ++i) { | ||
count += counters[i]; | ||
} | ||
|
||
printf("%s, ALL, %u, %llu, %u, %llu\n", tag, CONFIG_TEST_DURATION_S, count, NUM_CPUS, | ||
count / CONFIG_TEST_DURATION_S / NUM_CPUS); | ||
} | ||
|
||
static void create_join_common(const char *tag, create_fn create, join_fn join) | ||
{ | ||
int i; | ||
int __maybe_unused ret; | ||
uint64_t now_ms = k_uptime_get(); | ||
const uint64_t end_ms = now_ms + MSEC_PER_SEC * CONFIG_TEST_DURATION_S; | ||
uint64_t update_ms = now_ms + MSEC_PER_SEC * UPDATE_INTERVAL_S; | ||
|
||
for (i = 0; i < NUM_CPUS; ++i) { | ||
/* spawn thread i */ | ||
prev_counters[i] = 0; | ||
ret = create(i); | ||
__ASSERT(ret == 0, "%s_create(%d)[%zu] failed: %d", tag, i, counters[i], ret); | ||
} | ||
|
||
do { | ||
if (!IS_ENABLED(CONFIG_SMP)) { | ||
/* allow the test thread to be swapped-out */ | ||
k_yield(); | ||
} | ||
|
||
for (i = 0; i < NUM_CPUS; ++i) { | ||
if (alive[i]) { | ||
ret = join(i); | ||
__ASSERT(ret, "%s_join(%d)[%zu] failed: %d", tag, i, counters[i], | ||
ret); | ||
alive[i] = false; | ||
|
||
/* update counter i after each (create,join) pair */ | ||
++counters[i]; | ||
|
||
if (IS_ENABLED(CONFIG_TEST_DELAY_US)) { | ||
/* success with 0 delay means we are ~raceless */ | ||
k_busy_wait(CONFIG_TEST_DELAY_US); | ||
} | ||
|
||
/* re-spawn thread i */ | ||
ret = create(i); | ||
__ASSERT(ret == 0, "%s_create(%d)[%zu] failed: %d", tag, i, | ||
counters[i], ret); | ||
} | ||
} | ||
|
||
/* are we there yet? */ | ||
now_ms = k_uptime_get(); | ||
|
||
/* dump some stats periodically */ | ||
if (now_ms > update_ms) { | ||
update_ms += MSEC_PER_SEC * UPDATE_INTERVAL_S; | ||
|
||
/* at this point, we should have seen many context switches */ | ||
for (i = 0; IS_ENABLED(CONFIG_ASSERT) && i < NUM_CPUS; ++i) { | ||
__ASSERT(counters[i] > 0, "%s %d was never scheduled", tag, i); | ||
} | ||
|
||
if (IS_ENABLED(CONFIG_TEST_PERIODIC_STATS)) { | ||
print_stats(tag, now_ms, end_ms); | ||
} | ||
} | ||
Z_SPIN_DELAY(100); | ||
} while (end_ms > now_ms); | ||
|
||
print_group_stats(tag); | ||
} | ||
|
||
/* | ||
* Wrappers for k_threads | ||
*/ | ||
|
||
static void k_thread_fun(void *arg1, void *arg2, void *arg3) | ||
{ | ||
int i = POINTER_TO_INT(arg1); | ||
|
||
alive[i] = true; | ||
} | ||
|
||
static int k_thread_create_wrapper(int i) | ||
{ | ||
k_thread_create(&k_threads[i], thread_stacks[i], STACK_SIZE, k_thread_fun, | ||
INT_TO_POINTER(i), NULL, NULL, K_HIGHEST_APPLICATION_THREAD_PRIO, 0, | ||
K_NO_WAIT); | ||
|
||
return 0; | ||
} | ||
|
||
static int k_thread_join_wrapper(int i) | ||
{ | ||
return k_thread_join(&k_threads[i], K_FOREVER); | ||
} | ||
|
||
static void create_join_kthread(void) | ||
{ | ||
if (IS_ENABLED(CONFIG_TEST_KTHREADS)) { | ||
before(); | ||
create_join_common("k_thread", k_thread_create_wrapper, k_thread_join_wrapper); | ||
} | ||
} | ||
|
||
/* | ||
* Wrappers for pthreads | ||
*/ | ||
|
||
static pthread_t pthreads[NUM_CPUS]; | ||
static pthread_attr_t pthread_attrs[NUM_CPUS]; | ||
|
||
static void *pthread_fun(void *arg) | ||
{ | ||
k_thread_fun(arg, NULL, NULL); | ||
return NULL; | ||
} | ||
|
||
static int pthread_create_wrapper(int i) | ||
{ | ||
return pthread_create(&pthreads[i], &pthread_attrs[i], pthread_fun, INT_TO_POINTER(i)); | ||
} | ||
|
||
static int pthread_join_wrapper(int i) | ||
{ | ||
return pthread_join(pthreads[i], NULL); | ||
} | ||
|
||
static void create_join_pthread(void) | ||
{ | ||
if (IS_ENABLED(CONFIG_TEST_PTHREADS)) { | ||
before(); | ||
create_join_common("pthread", pthread_create_wrapper, pthread_join_wrapper); | ||
} | ||
} | ||
|
||
static void setup(void) | ||
{ | ||
printf("ASSERT: %c\n", IS_ENABLED(CONFIG_ASSERT) ? 'y' : 'n'); | ||
printf("BOARD: %s\n", CONFIG_BOARD); | ||
printf("NUM_CPUS: %u\n", NUM_CPUS); | ||
printf("TEST_DELAY_US: %u\n", CONFIG_TEST_DELAY_US); | ||
printf("TEST_DURATION_S: %u\n", CONFIG_TEST_DURATION_S); | ||
printf("SMP: %c\n", IS_ENABLED(CONFIG_SMP) ? 'y' : 'n'); | ||
|
||
printf("API, Thread ID, time(s), threads, cores, rate (threads/s/core)\n"); | ||
|
||
if (IS_ENABLED(CONFIG_TEST_PTHREADS)) { | ||
int __maybe_unused ret; | ||
const struct sched_param param = { | ||
.sched_priority = sched_get_priority_max(SCHED_FIFO), | ||
}; | ||
|
||
/* setup pthread stacks */ | ||
for (int i = 0; i < NUM_CPUS; ++i) { | ||
ret = pthread_attr_init(&pthread_attrs[i]); | ||
__ASSERT(ret == 0, "pthread_attr_init[%d] failed: %d", i, ret); | ||
|
||
ret = pthread_attr_setstack(&pthread_attrs[i], thread_stacks[i], | ||
STACK_SIZE); | ||
__ASSERT(ret == 0, "pthread_attr_setstack[%d] failed: %d", i, ret); | ||
|
||
ret = pthread_attr_setschedpolicy(&pthread_attrs[i], SCHED_FIFO); | ||
__ASSERT(ret == 0, "pthread_attr_setschedpolicy[%d] failed: %d", i, ret); | ||
|
||
ret = pthread_attr_setschedparam(&pthread_attrs[i], ¶m); | ||
__ASSERT(ret == 0, "pthread_attr_setschedparam[%d] failed: %d", i, ret); | ||
} | ||
} | ||
} | ||
|
||
static void before(void) | ||
{ | ||
for (int i = 0; i < NUM_CPUS; ++i) { | ||
counters[i] = 0; | ||
} | ||
} | ||
|
||
int main(void) | ||
{ | ||
setup(); | ||
|
||
create_join_kthread(); | ||
create_join_pthread(); | ||
|
||
printf("PROJECT EXECUTION SUCCESSFUL\n"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
common: | ||
tags: | ||
- posix | ||
- benchmark | ||
min_ram: 64 | ||
arch_exclude: | ||
- posix | ||
integration_platforms: | ||
- qemu_cortex_a53/qemu_cortex_a53/smp | ||
- qemu_riscv64/qemu_virt_riscv64/smp | ||
- qemu_riscv32/qemu_virt_riscv32/smp | ||
- qemu_x86_64 | ||
harness: console | ||
harness_config: | ||
type: one_line | ||
record: | ||
regex: "(?P<api>.*), ALL, (?P<time>.*), (?P<threads>.*), (?P<cores>.*), (?P<rate>.*)" | ||
regex: | ||
- "PROJECT EXECUTION SUCCESSFUL" | ||
tests: | ||
benchmark.posix.threads: {} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.