Skip to content

Commit

Permalink
CP-49775 convert SMGC to systemd service
Browse files Browse the repository at this point in the history
Add a templated oneshot service and use systemctl to start it. The
systemctl command is invoked with "--no-wait" so that it returns
immediately because the "oneshot" nature of the service would otherwise
wait until the command completes.

Note that because the service is a oneshot with RemainAfterExit=no (so
we can multiply start it), it will go from "activating" to "dead"
without ever appearing to be "running".

Signed-off-by: Tim Smith <[email protected]>
  • Loading branch information
Tim Smith committed Jun 14, 2024
1 parent 2b6bac9 commit 1b905ac
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 11 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ install: precheck
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/sr_health_check.timer \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/[email protected] \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
for i in $(UDEV_RULES); do \
install -m 644 udev/$$i.rules \
$(SM_STAGING)$(UDEV_RULES_DIR); done
Expand Down
2 changes: 1 addition & 1 deletion drivers/FileSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def _process_replay(self, data):

def _kickGC(self):
util.SMlog("Kicking GC")
cleanup.start_gc(self.session, self.uuid)
cleanup.start_gc_service(self.uuid)

def _isbind(self):
# os.path.ismount can't deal with bind mount
Expand Down
4 changes: 2 additions & 2 deletions drivers/LVHDSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ def _handleInterruptedCoalesceLeaf(self):
entries = self.journaler.getAll(cleanup.VDI.JRN_LEAF)
if len(entries) > 0:
util.SMlog("*** INTERRUPTED COALESCE-LEAF OP DETECTED ***")
cleanup.gc_force(self.session, self.uuid)
cleanup.start_gc_service(self.uuid, True)
self.lvmCache.refresh()

def _handleInterruptedCloneOp(self, origUuid, jval, forceUndo=False):
Expand Down Expand Up @@ -1290,7 +1290,7 @@ def _prepareTestMode(self):

def _kickGC(self):
util.SMlog("Kicking GC")
cleanup.start_gc(self.session, self.uuid)
cleanup.start_gc_service(self.uuid)

def ensureCBTSpace(self):
# Ensure we have space for at least one LV
Expand Down
40 changes: 32 additions & 8 deletions drivers/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,10 @@ def gc(session, srUuid, inBackground, dryRun=False):


def start_gc(session, sr_uuid):
"""
This function is used to try to start a backgrounded GC session by forking
the current process. If using the systemd version, call start_gc_service() instead.
"""
# don't bother if an instance already running (this is just an
# optimization to reduce the overhead of forking a new process if we
# don't have to, but the process will check the lock anyways)
Expand All @@ -3230,6 +3234,24 @@ def start_gc(session, sr_uuid):
subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)

def start_gc_service(sr_uuid, wait=False):
"""
This starts the templated systemd service which runs GC on the given SR UUID.
If the service was already started, this is a no-op.
Because the service is a one-shot with RemainAfterExit=no, when called with
wait=True this will run the service synchronously and will not return until the
run has finished. This is used to force a run of the GC instead of just kicking it
in the background.
"""
sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
util.SMlog(f"Kicking SMGC@{sr_uuid}...")
cmd=[ "/usr/bin/systemctl", "--quiet" ]
if not wait:
cmd.append("--no-block")
cmd += ["start", f"SMGC@{sr_uuid_esc}"]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)


def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
"""Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
Expand Down Expand Up @@ -3263,15 +3285,17 @@ def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):


def get_state(srUuid):
"""Return whether GC/coalesce is currently running or not. The information
is not guaranteed for any length of time if the call is not protected by
locking.
"""Return whether GC/coalesce is currently running or not. This asks systemd for
the state of the templated SMGC service and will return True if it is "activating"
or "running" (for completeness, as in practice it will never achieve the latter state)
"""
init(srUuid)
if lockGCActive.acquireNoblock():
lockGCActive.release()
return False
return True
sr_uuid_esc = srUuid.replace("-", "\\x2d")
cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
state = result.stdout.decode('utf-8').rstrip()
if state == "activating" or state == "running":
return True
return False


def should_preempt(session, srUuid):
Expand Down
1 change: 1 addition & 0 deletions mk/sm.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ tests/run_python_unittests.sh
%{_unitdir}/usb-scan.socket
%{_unitdir}/mpathcount.service
%{_unitdir}/mpathcount.socket
%{_unitdir}/[email protected]
%config /etc/udev/rules.d/65-multipath.rules
%config /etc/udev/rules.d/55-xs-mpath-scsidev.rules
%config /etc/udev/rules.d/58-xapi.rules
Expand Down
12 changes: 12 additions & 0 deletions systemd/[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[Unit]
Description=Garbage Collector for SR %I
DefaultDependencies=no

[Service]
Type=oneshot
Restart=no
ExecStart=/opt/xensource/sm/cleanup.py -g -u %I
# This is the default, but just to make it clear we may run this
# service multiple times. When running, it will show as "activating";
# when not running, it will show as "dead"
RemainAfterExit=no

0 comments on commit 1b905ac

Please sign in to comment.