Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make tests less flaky on CI #716

Merged
merged 3 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions kazoo/testing/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,9 @@ def run(self):
)
self.process = subprocess.Popen(args=args)
log.info(
"Started zookeeper process %s using args %s",
"Started zookeeper process %s on port %s using args %s",
self.process.pid,
self.server_info.client_port,
args,
)
self._running = True
Expand Down Expand Up @@ -304,12 +305,12 @@ def destroy(self):

shutil.rmtree(self.working_path, True)

def get_logs(self):
def get_logs(self, num_lines=100):
log_path = pathlib.Path(self.working_path, "zookeeper.log")
if log_path.exists():
log_file = log_path.open("r")
lines = log_file.readlines()
return lines[-100:]
return lines[-num_lines:]
return []


Expand Down
35 changes: 33 additions & 2 deletions kazoo/testing/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"ZOOKEEPER_OBSERVER_START_ID": -1,
"ZOOKEEPER_LOCAL_SESSION_RO": "false",
}
MAX_INIT_TRIES = 5


def get_global_cluster():
Expand Down Expand Up @@ -166,6 +167,9 @@ def __init__(self, *args, **kw):
def cluster(self):
return get_global_cluster()

def log(self, level, msg, *args, **kwargs):
log.log(level, msg, *args, **kwargs)

@property
def servers(self):
return ",".join([s.address for s in self.cluster])
Expand Down Expand Up @@ -208,8 +212,29 @@ def setup_zookeeper(self, **client_options):
self.cluster.start()
namespace = "/kazootests" + uuid.uuid4().hex
self.hosts = self.servers + namespace
if "timeout" not in client_options:
client_options["timeout"] = self.DEFAULT_CLIENT_TIMEOUT

tries = 0
while True:
try:
client_cluster_health = self._get_client()
client_cluster_health.start()
client_cluster_health.ensure_path("/")
client_cluster_health.stop()
self.log(logging.INFO, "cluster looks ready to go")
break
except Exception:
tries += 1
if tries >= MAX_INIT_TRIES:
raise
if tries > 0 and tries % 2 == 0:
self.log(
logging.WARNING,
"nuking current cluster and making another one",
)
self.cluster.terminate()
self.cluster.start()
StephenSorriaux marked this conversation as resolved.
Show resolved Hide resolved
continue

self.client = self._get_client(**client_options)
self.client.start()
self.client.ensure_path("/")
Expand Down Expand Up @@ -259,3 +284,9 @@ def setUp(self):

def tearDown(self):
self.teardown_zookeeper()

@classmethod
def tearDownClass(cls):
cluster = get_global_cluster()
StephenSorriaux marked this conversation as resolved.
Show resolved Hide resolved
if cluster is not None:
cluster.terminate()
4 changes: 3 additions & 1 deletion kazoo/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@


def pytest_exception_interact(node, call, report):
if hasattr(node._testcase, "cluster"):
jeffwidman marked this conversation as resolved.
Show resolved Hide resolved
try:
cluster = node._testcase.cluster
log.error("Zookeeper cluster logs:")
for logs in cluster.get_logs():
log.error(logs)
except Exception:
log.exception("Cannot get ZK logs:")
21 changes: 17 additions & 4 deletions kazoo/tests/test__connection.py → kazoo/tests/test_connection.py
StephenSorriaux marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
from kazoo.protocol.states import KazooState
from kazoo.protocol.connection import _CONNECTION_DROP
from kazoo.testing import KazooTestCase
from kazoo.tests.util import wait
from kazoo.tests.util import CI_ZK_VERSION
from kazoo.tests.util import wait, CI_ZK_VERSION, CI


class Delete(namedtuple("Delete", "path version")):
Expand Down Expand Up @@ -258,7 +257,7 @@ def back(state):
class TestReadOnlyMode(KazooTestCase):
def setUp(self):
os.environ["ZOOKEEPER_LOCAL_SESSION_RO"] = "true"
self.setup_zookeeper(read_only=True)
self.setup_zookeeper()
jeffwidman marked this conversation as resolved.
Show resolved Hide resolved
skip = False
if CI_ZK_VERSION and CI_ZK_VERSION < (3, 4):
skip = True
Expand All @@ -279,7 +278,15 @@ def test_read_only(self):
from kazoo.exceptions import NotReadOnlyCallError
from kazoo.protocol.states import KeeperState

client = self.client
if CI:
# force some wait to make sure the data produced during the
# `setUp()` step are replicaed to all zk members
StephenSorriaux marked this conversation as resolved.
Show resolved Hide resolved
# if not done the `get_children()` test may fail because the
# node does not exist on the node that we will keep alive
time.sleep(15)
# do not keep the client started in the `setUp` step alive
self.client.stop()
client = self._get_client(connection_retry=None, read_only=True)
states = []
ev = threading.Event()

Expand All @@ -289,6 +296,7 @@ def listen(state):
if client.client_state == KeeperState.CONNECTED_RO:
ev.set()

client.start()
try:
# stopping both nodes at the same time
# else the test seems flaky when on CI hosts
Expand All @@ -303,6 +311,11 @@ def listen(state):
thread.start()
for thread in zk_stop_threads:
thread.join()
# stopping the client is *mandatory*, else the client might try to
# reconnect using a xid that the server may endlessly refuse
# restarting the client makes sure the xid gets reset
client.stop()
client.start()
ev.wait(15)
assert ev.is_set()
assert client.client_state == KeeperState.CONNECTED_RO
Expand Down
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ extend-exclude = '''

[tool.pytest.ini_options]
addopts = "-ra -v"
log_cli = true
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
log_cli_format = "%(asctime)s %(levelname)s %(message)s"
log_cli_level = "INFO"

[tool.mypy]

Expand Down Expand Up @@ -114,11 +118,11 @@ module = [
'kazoo.testing.common',
'kazoo.testing.harness',
'kazoo.tests.conftest',
'kazoo.tests.test__connection',
'kazoo.tests.test_barrier',
'kazoo.tests.test_build',
'kazoo.tests.test_cache',
'kazoo.tests.test_client',
'kazoo.tests.test_connection',
'kazoo.tests.test_counter',
'kazoo.tests.test_election',
'kazoo.tests.test_eventlet_handler',
Expand Down