-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
karabo-bridge-serve-run command #458
Changes from all commits
a0f059c
e97f0cb
2d4f64d
eae9935
0e3d0df
a67217f
cda0b9c
73208fc
10f145a
4554836
505aa3b
a46793d
f15401c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from argparse import ArgumentParser | ||
import sys | ||
|
||
from .. import open_run | ||
|
||
IMPORT_FAILED_MSG = """\ | ||
{} | ||
|
||
karabo-bridge-serve-run requires additional dependencies: | ||
pip install karabo-bridge psutil | ||
""" | ||
|
||
def main(argv=None): | ||
ap = ArgumentParser(prog="karabo-bridge-serve-run") | ||
ap.add_argument("proposal", help="Proposal number") | ||
ap.add_argument("run", help="Run number") | ||
ap.add_argument( | ||
"--port", default="0", help="TCP port or ZMQ endpoint to send data on. " | ||
"Selects a random TCP port by default.") | ||
ap.add_argument( | ||
"--include", help="Select matching sources (and optionally keys) to " | ||
"include in streamed data", | ||
action='append' | ||
) | ||
ap.add_argument( | ||
"--allow-partial", help="Send trains where some sources are missing", | ||
action='store_true' | ||
) | ||
ap.add_argument( | ||
"--append-detector-modules", help="combine multiple module sources" | ||
" into one (will only work for AGIPD data currently).", | ||
action='store_true' | ||
) | ||
ap.add_argument( | ||
"--dummy-timestamps", help="create dummy timestamps if the meta-data" | ||
" lacks proper timestamps", | ||
action='store_true' | ||
) | ||
ap.add_argument( | ||
"--use-infiniband", help="Use infiniband interface if available " | ||
"(if a TCP port is specified)", | ||
action='store_true' | ||
) | ||
ap.add_argument( | ||
"-z", "--socket-type", help="ZeroMQ socket type", | ||
choices=['PUB', 'PUSH', 'REP'], default='REP' | ||
) | ||
args = ap.parse_args(argv) | ||
|
||
try: | ||
from ..export import serve_data | ||
except ImportError as e: | ||
sys.exit(IMPORT_FAILED_MSG.format(e)) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the reason to do this import here within the function and not at the top of the file? (except of course the fail message constant needs to be defined first) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd usually try to avoid side-effects (like We could still have the import at the top like this: # Top of file
try:
from ..export import serve_data
except ImportError:
serve_data = None
# In the function
if serve_data is None:
sys.exit(msg) But that looks less neat to me. |
||
run = open_run(args.proposal, args.run, data='all') | ||
|
||
if not args.include: | ||
print("Available sources:") | ||
for s in sorted(run.all_sources): | ||
print(f" {s}") | ||
sys.exit("Please select at least one source with --include") | ||
|
||
include = [] | ||
for pat in args.include: | ||
if '[' in pat: | ||
if not pat.endswith(']'): | ||
sys.exit(f"Missing final ] in {pat!r}") | ||
src_pat, key_pat = pat[:-1].split('[', 1) | ||
include.append((src_pat, key_pat)) | ||
else: | ||
# Source pattern only | ||
include.append(pat) | ||
|
||
if args.allow_partial: | ||
sel = run.select(include, require_any=True) | ||
else: | ||
sel = run.select(include, require_all=True) | ||
|
||
try: | ||
serve_data( | ||
sel, args.port, | ||
append_detector_modules=args.append_detector_modules, | ||
dummy_timestamps=args.dummy_timestamps, | ||
use_infiniband=args.use_infiniband, sock=args.socket_type | ||
) | ||
except KeyboardInterrupt: | ||
print('\nStopped.') | ||
|
||
if __name__ == '__main__': | ||
main() | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ | |
""" | ||
|
||
import os.path as osp | ||
import time | ||
from collections import deque | ||
from socket import AF_INET | ||
from warnings import warn | ||
|
||
|
@@ -107,9 +109,7 @@ | |
yield tid, train_data | ||
|
||
|
||
def serve_files(path, port, source_glob='*', key_glob='*', | ||
append_detector_modules=False, dummy_timestamps=False, | ||
use_infiniband=False, sock='REP'): | ||
def serve_files(path, port, source_glob='*', key_glob='*', **kwargs): | ||
"""Stream data from files through a TCP socket. | ||
|
||
Parameters | ||
|
@@ -144,15 +144,58 @@ | |
data = H5File(path) | ||
|
||
data = data.select(source_glob, key_glob) | ||
serve_data(data, port, **kwargs) | ||
|
||
|
||
def serve_data(data, port, append_detector_modules=False, | ||
dummy_timestamps=False, use_infiniband=False, sock='REP'): | ||
"""Stream data from files through a TCP socket. | ||
|
||
Parameters | ||
---------- | ||
data: DataCollection | ||
The data to be streamed; should already have sources & keys selected. | ||
port: str or int | ||
A ZMQ endpoint (e.g. 'tcp://*:44444') or a TCP port to bind the socket | ||
to. Integers or strings of all digits are treated as port numbers. | ||
append_detector_modules: bool | ||
Combine multi-module detector data in a single data source (sources for | ||
individual modules are removed). The last section of the source name is | ||
replaces with 'APPEND', example: | ||
'SPB_DET_AGIPD1M-1/DET/#CH0:xtdf' -> 'SPB_DET_AGIPD1M-1/DET/APPEND' | ||
|
||
Supported detectors: AGIPD, DSSC, LPD | ||
dummy_timestamps: bool | ||
Whether to add mock timestamps if the metadata lacks them. | ||
use_infiniband: bool | ||
Use infiniband interface if available (if port specifies a TCP port) | ||
sock: str | ||
socket type - supported: REP, PUB, PUSH (default REP). | ||
""" | ||
if isinstance(port, int) or port.isdigit(): | ||
endpt = f'tcp://{find_infiniband_ip() if use_infiniband else "*"}:{port}' | ||
else: | ||
endpt = port | ||
|
||
sender = Sender(endpt, sock=sock, dummy_timestamps=dummy_timestamps) | ||
print(f'Streamer started on: {sender.endpoint}') | ||
ntrains = len(data.train_ids) | ||
|
||
sent_times = deque([time.monotonic()], 10) | ||
count = 0 | ||
tid, rate = 0, 0. | ||
def print_update(end='\r'): | ||
print(f'Sent {count}/{ntrains} trains - Train ID {tid} - {rate:.1f} Hz', end=end) | ||
|
||
for tid, data in _iter_trains(data, merge_detector=append_detector_modules): | ||
sender.send(data) | ||
count += 1 | ||
new_time = time.monotonic() | ||
if count % 5 == 0: | ||
rate = len(sent_times) / (new_time - sent_times[0]) | ||
takluyver marked this conversation as resolved.
Show resolved
Hide resolved
|
||
print_update() | ||
sent_times.append(new_time) | ||
print_update(end='\n') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Factored-out function to allow different line-end behaviour, carriage return vs. line break - fair enough to avoid the longish format string expression twice. |
||
|
||
# The karabo-bridge code sets linger to 0 so that it doesn't get stuck if | ||
# the client goes away. But this would also mean that we close the socket | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact, I have used the old command (only) for streaming from a run directory, using the full path as argument. Apart from the fact that
...-serve-run
is indeed more convenient to achieve this, what would be the main use case for using the old command now?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
-serve-files
command makes it easy to stream from a non-standard run directory location, e.g. if we do an experimental correction of a run, we might put it in proposalscratch
. Or it gives you a way to stream fromred
before we've integrated support for that. Or if users transfer run data back to their home institution and want to use EXtra-data there. I think the new-serve-run
command will be better for ~95% of use cases.The biggest reason to retain the old command is compatibility & familiarity, though - don't break what's working for people. 🙂