From 245f1c33c860180c53f61329166726f71b067c64 Mon Sep 17 00:00:00 2001 From: Stephen Sorriaux Date: Mon, 10 Apr 2023 16:26:41 -0400 Subject: [PATCH 1/3] fix(tests): `test_read_only` is not flaky anymore. Fixed the `test_read_only` test based on what is done in the official Java client, we were missing some things and that led the test to fail from time to time (see comments in the code for full story). Also renamed `test__connection.py` to `test_connection.py`, where it belongs. --- ...test__connection.py => test_connection.py} | 21 +++++++++++++++---- pyproject.toml | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) rename kazoo/tests/{test__connection.py => test_connection.py} (93%) diff --git a/kazoo/tests/test__connection.py b/kazoo/tests/test_connection.py similarity index 93% rename from kazoo/tests/test__connection.py rename to kazoo/tests/test_connection.py index 80a777e8..032b94bb 100644 --- a/kazoo/tests/test__connection.py +++ b/kazoo/tests/test_connection.py @@ -18,8 +18,7 @@ from kazoo.protocol.states import KazooState from kazoo.protocol.connection import _CONNECTION_DROP from kazoo.testing import KazooTestCase -from kazoo.tests.util import wait -from kazoo.tests.util import CI_ZK_VERSION +from kazoo.tests.util import wait, CI_ZK_VERSION, CI class Delete(namedtuple("Delete", "path version")): @@ -258,7 +257,7 @@ def back(state): class TestReadOnlyMode(KazooTestCase): def setUp(self): os.environ["ZOOKEEPER_LOCAL_SESSION_RO"] = "true" - self.setup_zookeeper(read_only=True) + self.setup_zookeeper() skip = False if CI_ZK_VERSION and CI_ZK_VERSION < (3, 4): skip = True @@ -279,7 +278,15 @@ def test_read_only(self): from kazoo.exceptions import NotReadOnlyCallError from kazoo.protocol.states import KeeperState - client = self.client + if CI: + # force some wait to make sure the data produced during the + # `setUp()` step are replicaed to all zk members + # if not done the `get_children()` test may fail because the + # node does not exist on the node that we will keep alive + time.sleep(15) + # do not keep the client started in the `setUp` step alive + self.client.stop() + client = self._get_client(connection_retry=None, read_only=True) states = [] ev = threading.Event() @@ -289,6 +296,7 @@ def listen(state): if client.client_state == KeeperState.CONNECTED_RO: ev.set() + client.start() try: # stopping both nodes at the same time # else the test seems flaky when on CI hosts @@ -303,6 +311,11 @@ def listen(state): thread.start() for thread in zk_stop_threads: thread.join() + # stopping the client is *mandatory*, else the client might try to + # reconnect using a xid that the server may endlessly refuse + # restarting the client makes sure the xid gets reset + client.stop() + client.start() ev.wait(15) assert ev.is_set() assert client.client_state == KeeperState.CONNECTED_RO diff --git a/pyproject.toml b/pyproject.toml index b7d67f73..39d7d62f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,11 +114,11 @@ module = [ 'kazoo.testing.common', 'kazoo.testing.harness', 'kazoo.tests.conftest', - 'kazoo.tests.test__connection', 'kazoo.tests.test_barrier', 'kazoo.tests.test_build', 'kazoo.tests.test_cache', 'kazoo.tests.test_client', + 'kazoo.tests.test_connection', 'kazoo.tests.test_counter', 'kazoo.tests.test_election', 'kazoo.tests.test_eventlet_handler', From 7892edde6068f7b6b319deba47ae53cfa7801a52 Mon Sep 17 00:00:00 2001 From: Stephen Sorriaux Date: Mon, 10 Apr 2023 17:10:50 -0400 Subject: [PATCH 2/3] fix(testing): cleanup ZK cluster between test cases Now performing a ZK cluster nuking at the end of each test case since keeping the same ZK state from test cases to test cases actually led to some tests randomly failing. Added `client_cluster_health` ZK client in the `setup_zookeeper()` step that is there to "guarantee" the cluster is up and running, without even considering the configuration that will be used by the test itself. --- kazoo/testing/harness.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/kazoo/testing/harness.py b/kazoo/testing/harness.py index 2d28a5b1..880b1d9e 100644 --- a/kazoo/testing/harness.py +++ b/kazoo/testing/harness.py @@ -21,6 +21,7 @@ "ZOOKEEPER_OBSERVER_START_ID": -1, "ZOOKEEPER_LOCAL_SESSION_RO": "false", } +MAX_INIT_TRIES = 5 def get_global_cluster(): @@ -208,8 +209,29 @@ def setup_zookeeper(self, **client_options): self.cluster.start() namespace = "/kazootests" + uuid.uuid4().hex self.hosts = self.servers + namespace - if "timeout" not in client_options: - client_options["timeout"] = self.DEFAULT_CLIENT_TIMEOUT + + tries = 0 + while True: + try: + client_cluster_health = self._get_client() + client_cluster_health.start() + client_cluster_health.ensure_path("/") + client_cluster_health.stop() + self.log(logging.INFO, "cluster looks ready to go") + break + except Exception: + tries += 1 + if tries >= MAX_INIT_TRIES: + raise + if tries > 0 and tries % 2 == 0: + self.log( + logging.WARNING, + "nuking current cluster and making another one", + ) + self.cluster.terminate() + self.cluster.start() + continue + self.client = self._get_client(**client_options) self.client.start() self.client.ensure_path("/") @@ -259,3 +281,9 @@ def setUp(self): def tearDown(self): self.teardown_zookeeper() + + @classmethod + def tearDownClass(cls): + cluster = get_global_cluster() + if cluster is not None: + cluster.terminate() From 64af69879c095cef01ad7709891cc211173b1c19 Mon Sep 17 00:00:00 2001 From: Stephen Sorriaux Date: Mon, 10 Apr 2023 17:13:59 -0400 Subject: [PATCH 3/3] feat(testing): add more logging during test suite Activate `pytest`'s `logcli` setting to better read the client lifecycle during testing. Add `log()` function to KazooTestHarness class so that it is possible to log like crazy when something is not working Display the ZK client port in logs when starting a ZK server (useful for test "debug") Be able to get more than the last 100 lines of ZK logs (can be useful, believe me) --- kazoo/testing/common.py | 7 ++++--- kazoo/testing/harness.py | 3 +++ kazoo/tests/conftest.py | 4 +++- pyproject.toml | 4 ++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kazoo/testing/common.py b/kazoo/testing/common.py index 7918221b..a3491993 100644 --- a/kazoo/testing/common.py +++ b/kazoo/testing/common.py @@ -221,8 +221,9 @@ def run(self): ) self.process = subprocess.Popen(args=args) log.info( - "Started zookeeper process %s using args %s", + "Started zookeeper process %s on port %s using args %s", self.process.pid, + self.server_info.client_port, args, ) self._running = True @@ -304,12 +305,12 @@ def destroy(self): shutil.rmtree(self.working_path, True) - def get_logs(self): + def get_logs(self, num_lines=100): log_path = pathlib.Path(self.working_path, "zookeeper.log") if log_path.exists(): log_file = log_path.open("r") lines = log_file.readlines() - return lines[-100:] + return lines[-num_lines:] return [] diff --git a/kazoo/testing/harness.py b/kazoo/testing/harness.py index 880b1d9e..d92a3fc7 100644 --- a/kazoo/testing/harness.py +++ b/kazoo/testing/harness.py @@ -167,6 +167,9 @@ def __init__(self, *args, **kw): def cluster(self): return get_global_cluster() + def log(self, level, msg, *args, **kwargs): + log.log(level, msg, *args, **kwargs) + @property def servers(self): return ",".join([s.address for s in self.cluster]) diff --git a/kazoo/tests/conftest.py b/kazoo/tests/conftest.py index eef9965d..931fd84f 100644 --- a/kazoo/tests/conftest.py +++ b/kazoo/tests/conftest.py @@ -4,8 +4,10 @@ def pytest_exception_interact(node, call, report): - if hasattr(node._testcase, "cluster"): + try: cluster = node._testcase.cluster log.error("Zookeeper cluster logs:") for logs in cluster.get_logs(): log.error(logs) + except Exception: + log.exception("Cannot get ZK logs:") diff --git a/pyproject.toml b/pyproject.toml index 39d7d62f..f474736c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,10 @@ extend-exclude = ''' [tool.pytest.ini_options] addopts = "-ra -v" +log_cli = true +log_cli_date_format = "%Y-%m-%d %H:%M:%S" +log_cli_format = "%(asctime)s %(levelname)s %(message)s" +log_cli_level = "INFO" [tool.mypy]