From 97b973be800f32cf226e76223956e0fa9130d627 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Wed, 30 Oct 2024 17:13:19 -0700 Subject: [PATCH 01/13] Checkpoint commit --- paasta_tools/api/api.py | 4 + paasta_tools/api/api_docs/oapi.yaml | 36 ++++++ paasta_tools/api/api_docs/swagger.json | 46 ++++++++ paasta_tools/api/views/instance.py | 16 +++ paasta_tools/cli/cli.py | 1 + paasta_tools/cli/cmds/remote_run_2.py | 109 +++++++++++++++++ paasta_tools/paasta_remote_run_2.py | 51 ++++++++ paasta_tools/paastaapi/api/service_api.py | 136 ++++++++++++++++++++++ 8 files changed, 399 insertions(+) create mode 100644 paasta_tools/cli/cmds/remote_run_2.py create mode 100755 paasta_tools/paasta_remote_run_2.py diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index f01a48dac1..a0f20d89c8 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -148,6 +148,10 @@ def make_app(global_config=None): "service.instance.tasks.task", "/v1/services/{service}/{instance}/tasks/{task_id}", ) + config.add_route( + "service.instance.remote_run", + "/v1/services/{service}/{instance}/remote_run", + ) config.add_route("service.list", "/v1/services/{service}") config.add_route("services", "/v1/services") config.add_route( diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index 1518b6b65d..a5b4310684 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1627,6 +1627,42 @@ paths: summary: Get mesos task of service_name.instance_name by task_id tags: - service + /services/{service}/{instance}/remote_run: + post: + operationId: remote_run + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + - description: Username + in: query + name: user + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + type: string + description: The service is delayed for these possible reasons + "404": + description: Deployment key not found + "500": + description: Failure + summary: Launch a remote-run pod + tags: + - service /version: get: operationId: showVersion diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 9caa3c8cac..7ee0a58740 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -846,6 +846,52 @@ } ] } + }, + "/services/{service}/{instance}/remote_run": { + "post": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Do a remote run", + "operationId": "remote_run", + "tags": [ + "service" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "query", + "description": "Username", + "name": "user", + "required": true, + "type": "string" + } + ] + } } }, "definitions": { diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 945a1d5187..274fe782af 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -31,6 +31,7 @@ import paasta_tools.mesos.exceptions as mesos_exceptions from paasta_tools import paasta_remote_run +from paasta_tools import paasta_remote_run_2 from paasta_tools import tron_tools from paasta_tools.api import settings from paasta_tools.api.views.exception import ApiFailure @@ -385,3 +386,18 @@ def instance_mesh_status(request): raise ApiFailure(error_message, 500) return instance_mesh + + +@view_config( + route_name="service.instance.remote_run", request_method="POST", renderer="json" +) +def remote_run(request): + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data.get("user") + + try: + paasta_remote_run_2.remote_run_start(service, instance, user, settings.cluster) + except Exception: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) diff --git a/paasta_tools/cli/cli.py b/paasta_tools/cli/cli.py index 92f9dd222b..22af8a138d 100755 --- a/paasta_tools/cli/cli.py +++ b/paasta_tools/cli/cli.py @@ -118,6 +118,7 @@ def add_subparser(command, subparsers): "pause_service_autoscaler": "pause_service_autoscaler", "push-to-registry": "push_to_registry", "remote-run": "remote_run", + "remote-run-2": "remote_run_2", "rollback": "rollback", "secret": "secret", "security-check": "security_check", diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py new file mode 100644 index 0000000000..dd380ccaf1 --- /dev/null +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paasta_tools.api.client import get_paasta_oapi_client +from paasta_tools.cli.utils import get_paasta_oapi_api_clustername +from paasta_tools.cli.utils import lazy_choices_completer +from paasta_tools.utils import list_services +from paasta_tools.utils import load_system_paasta_config +from paasta_tools.utils import PaastaColors +from paasta_tools.utils import SystemPaastaConfig + + +def add_common_args_to_parser(parser): + parser.add_argument( + "-s", + "--service", + help="The name of the service you wish to inspect. Required.", + required=True, + ).completer = lazy_choices_completer(list_services) + parser.add_argument( + "-i", + "--instance", + help=( + "Simulate a docker run for a particular instance of the " + "service, like 'main' or 'canary'. Required." + ), + required=True, + ) + parser.add_argument( + "-c", + "--cluster", + help=( + "The name of the cluster you wish to run your task on. " + "If omitted, uses the default cluster defined in the paasta " + f"remote-run configs." + ), + ) + + +def add_subparser( + subparsers, +) -> None: + remote_run_parser = subparsers.add_parser( + "remote-run-2", + help="Run stuff remotely.", + description=("'paasta remote-run' runs stuff remotely "), + ) + add_common_args_to_parser(remote_run_parser) + remote_run_parser.set_defaults(command=remote_run) + + +def paasta_remote_run( + cluster: str, + service: str, + instance: str, + system_paasta_config: SystemPaastaConfig, + verbose: int, + is_eks: bool = False, +) -> int: + output = [] + ret_code = 0 + client = get_paasta_oapi_client( + cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks), + system_paasta_config=system_paasta_config, + ) + if not client: + print("Cannot get a paasta-api client") + exit(1) + + try: + response = client.service.remote_run( + service=service, instance=instance, user="qlo" + ) + print(response) + except client.api_error as exc: + output.append(PaastaColors.red(exc.reason)) + ret_code = exc.status + except (client.connection_error, client.timeout_error) as exc: + output.append( + PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}") + ) + ret_code = 1 + except Exception as e: + output.append(PaastaColors.red(f"Exception when talking to the API:")) + output.append(str(e)) + ret_code = 1 + + print("\n".join(output)) + + return ret_code + + +def remote_run(args) -> int: + """Run stuff, but remotely!""" + system_paasta_config = load_system_paasta_config() + paasta_remote_run( + args.cluster, args.service, args.instance, system_paasta_config, 1, False + ) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py new file mode 100755 index 0000000000..ed60a73aa7 --- /dev/null +++ b/paasta_tools/paasta_remote_run_2.py @@ -0,0 +1,51 @@ +from time import sleep + +from paasta_tools.kubernetes.application.controller_wrappers import ( + get_application_wrapper, +) +from paasta_tools.kubernetes_tools import KubeClient +from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache +from paasta_tools.utils import DEFAULT_SOA_DIR + + +def remote_run_start(service, instance, user, cluster): + kube_client = KubeClient() + is_eks = False + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() + + formatted_application = deployment.format_kubernetes_app() + formatted_application.metadata.name += f"-remote-run-{user}" + pod_name = formatted_application.metadata.name + app_wrapper = get_application_wrapper(formatted_application) + app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) + app_wrapper.create(kube_client) + + # Get pod status and name + for retry in range(5): + pod_list = kube_client.core.list_namespaced_pod(namespace) + matching_pod = None + for pod in pod_list.items: + if pod.metadata.name.startswith(pod_name): + matching_pod = pod + break + + if not matching_pod: + sleep(1) + continue + + if pod.status.phase == "Running": + break + elif pod.status.phase != "Initializing": + raise Exception(f"Pod state is {pod.status.phase}") + + if not matching_pod: + raise Exception("No matching pod") + + return {"Status": "Success!", "pod_name": pod.metadata.name, "namespace": namespace} + + +def remote_run_stop(): + pass diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py index 63901f7cee..7db7aec9fe 100644 --- a/paasta_tools/paastaapi/api/service_api.py +++ b/paasta_tools/paastaapi/api/service_api.py @@ -1299,6 +1299,142 @@ def __mesh_instance( callable=__mesh_instance ) + def __remote_run( + self, + service, + instance, + user, + **kwargs + ): + """Launch a remote-run pod # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + + >>> thread = api.remote_run(service, instance, user, async_req=True) + >>> result = thread.get() + + Args: + service (str): Service name + instance (str): Instance name + user (str): Username + + Keyword Args: + _return_http_data_only (bool): response data without head status + code and headers. Default is True. + _preload_content (bool): if False, the urllib3.HTTPResponse object + will be returned without reading/decoding response data. + Default is True. + _request_timeout (float/tuple): timeout setting for this request. If one + number provided, it will be total request timeout. It can also + be a pair (tuple) of (connection, read) timeouts. + Default is None. + _check_input_type (bool): specifies if type checking + should be done one the data sent to the server. + Default is True. + _check_return_type (bool): specifies if type checking + should be done one the data received from the server. + Default is True. + _host_index (int/None): specifies the index of the server + that we want to use. + Default is read from the configuration. + async_req (bool): execute request asynchronously + + Returns: + str + If the method is called asynchronously, returns the request + thread. + """ + kwargs['async_req'] = kwargs.get( + 'async_req', False + ) + kwargs['_return_http_data_only'] = kwargs.get( + '_return_http_data_only', True + ) + kwargs['_preload_content'] = kwargs.get( + '_preload_content', True + ) + kwargs['_request_timeout'] = kwargs.get( + '_request_timeout', None + ) + kwargs['_check_input_type'] = kwargs.get( + '_check_input_type', True + ) + kwargs['_check_return_type'] = kwargs.get( + '_check_return_type', True + ) + kwargs['_host_index'] = kwargs.get('_host_index') + kwargs['service'] = \ + service + kwargs['instance'] = \ + instance + kwargs['user'] = \ + user + return self.call_with_http_info(**kwargs) + + self.remote_run = Endpoint( + settings={ + 'response_type': (str,), + 'auth': [], + 'endpoint_path': '/services/{service}/{instance}/remote_run', + 'operation_id': 'remote_run', + 'http_method': 'POST', + 'servers': None, + }, + params_map={ + 'all': [ + 'service', + 'instance', + 'user', + ], + 'required': [ + 'service', + 'instance', + 'user', + ], + 'nullable': [ + ], + 'enum': [ + ], + 'validation': [ + ] + }, + root_map={ + 'validations': { + }, + 'allowed_values': { + }, + 'openapi_types': { + 'service': + (str,), + 'instance': + (str,), + 'user': + (str,), + }, + 'attribute_map': { + 'service': 'service', + 'instance': 'instance', + 'user': 'user', + }, + 'location_map': { + 'service': 'path', + 'instance': 'path', + 'user': 'query', + }, + 'collection_format_map': { + } + }, + headers_map={ + 'accept': [ + 'application/json' + ], + 'content_type': [], + }, + api_client=api_client, + callable=__remote_run + ) + def __status_instance( self, service, From 3e049e10bf64bff087da0ce816d8de8653ad4b40 Mon Sep 17 00:00:00 2001 From: Matteo Piano Date: Fri, 1 Nov 2024 07:53:21 -0700 Subject: [PATCH 02/13] auth support for paasta APIs --- paasta_tools/api/api.py | 19 +++++ paasta_tools/api/tweens/__init__.py | 6 ++ paasta_tools/api/tweens/auth.py | 113 ++++++++++++++++++++++++++++ requirements.txt | 2 +- tests/api/tweens/test_auth.py | 95 +++++++++++++++++++++++ 5 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 paasta_tools/api/tweens/auth.py create mode 100644 tests/api/tweens/test_auth.py diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index f01a48dac1..023a4f8e62 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -31,6 +31,7 @@ import paasta_tools.api from paasta_tools import kubernetes_tools from paasta_tools.api import settings +from paasta_tools.api.tweens import auth from paasta_tools.api.tweens import profiling from paasta_tools.api.tweens import request_logger from paasta_tools.utils import load_system_paasta_config @@ -79,6 +80,18 @@ def parse_paasta_api_args(): default=4, help="Number of gunicorn workers to run", ) + parser.add_argument( + "--auth-endpoint", + type=str, + default="", + help="External API authorization endpoint", + ) + parser.add_argument( + "--auth-enforce", + action="store_true", + default=False, + help="Enforce API authorization", + ) args = parser.parse_args() return args @@ -105,6 +118,7 @@ def make_app(global_config=None): config.include("pyramid_swagger") config.include(request_logger) + config.include(auth) config.add_route( "flink.service.instance.jobs", "/v1/flink/{service}/{instance}/jobs" @@ -257,6 +271,11 @@ def main(argv=None): if args.cluster: os.environ["PAASTA_API_CLUSTER"] = args.cluster + if args.auth_endpoint: + os.environ["PAASTA_API_AUTH_ENDPOINT"] = args.auth_endpoint + if args.auth_enforce: + os.environ["PAASTA_API_AUTH_ENFORCE"] = "1" + gunicorn_args = [ "gunicorn", "-w", diff --git a/paasta_tools/api/tweens/__init__.py b/paasta_tools/api/tweens/__init__.py index e69de29bb2..e8d08dd58d 100644 --- a/paasta_tools/api/tweens/__init__.py +++ b/paasta_tools/api/tweens/__init__.py @@ -0,0 +1,6 @@ +from typing import Callable + +from pyramid.request import Request +from pyramid.response import Response + +Handler = Callable[[Request], Response] diff --git a/paasta_tools/api/tweens/auth.py b/paasta_tools/api/tweens/auth.py new file mode 100644 index 0000000000..2e2ff7ef3d --- /dev/null +++ b/paasta_tools/api/tweens/auth.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +import os +from typing import NamedTuple + +import cachetools.func +import pyramid +import requests +from pyramid.config import Configurator +from pyramid.httpexceptions import HTTPForbidden +from pyramid.registry import Registry +from pyramid.request import Request +from pyramid.response import Response + +from paasta_tools.api.tweens import Handler + + +logger = logging.getLogger(__name__) +AUTH_CACHE_SIZE = 50000 +AUTH_CACHE_TTL = 30 * 60 + + +class AuthorizationOutcome(NamedTuple): + authorized: bool + reason: str + + +class AuthTweenFactory: + def __init__(self, handler: Handler, registry: Registry) -> None: + self.handler = handler + self.registry = registry + self.enforce = bool(os.getenv("PAASTA_API_AUTH_ENFORCE", "")) + self.endpoint = os.getenv("PAASTA_API_AUTH_ENDPOINT") + self.session = requests.Session() + + def __call__(self, request: Request) -> Response: + """ + Extracts relevant metadata from request, and checks if it is authorized + """ + token = request.headers.get("Authorization", "").strip() + token = token.split()[-1] if token else "" # removes "Bearer" prefix + auth_outcome = self.is_request_authorized(request.path, token, request.method) + if self.enforce and not auth_outcome.authorized: + return HTTPForbidden( + body=json.dumps({"reason": auth_outcome.reason}), + headers={"X-Auth-Failure-Reason": auth_outcome.reason}, + content_type="application/json", + charset="utf-8", + ) + return self.handler(request) + + @cachetools.func.ttl_cache(maxsize=AUTH_CACHE_SIZE, ttl=AUTH_CACHE_TTL) + def is_request_authorized( + self, path: str, token: str, method: str + ) -> AuthorizationOutcome: + """Check if API request is authorized + + :param str path: API path + :param str token: authentication token + :param str method: http method + :return: auth outcome + """ + try: + response = self.session.post( + url=self.endpoint, + json={ + "input": { + "path": path, + "backend": "paasta", + "token": token, + "method": method, + }, + }, + timeout=2, + ).json() + except Exception as e: + logger.exception(f"Issue communicating with auth endpoint: {e}") + return AuthorizationOutcome(False, "Auth backend error") + + if "result" not in response or "allowed" not in response["result"]: + return AuthorizationOutcome(False, "Malformed auth response") + + if not response["result"]["allowed"]: + reason = response["result"].get("reason", "Denied") + return AuthorizationOutcome(False, reason) + + reason = response["result"].get("reason", "Ok") + return AuthorizationOutcome(True, reason) + + +def includeme(config: Configurator): + if os.getenv("PAASTA_API_AUTH_ENDPOINT"): + config.add_tween( + "paasta_tools.api.tweens.auth.AuthTweenFactory", + under=( + pyramid.tweens.INGRESS, + "paasta_tools.api.tweens.request_logger.request_logger_tween_factory", + ), + ) diff --git a/requirements.txt b/requirements.txt index 9887bf9df6..f8c1a05e93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ boto3-type-annotations==0.3.1 botocore==1.34.22 bravado==10.4.1 bravado-core==5.12.1 -cachetools==2.0.1 +cachetools==5.5.0 certifi==2017.11.5 chardet==3.0.4 choice==0.1 diff --git a/tests/api/tweens/test_auth.py b/tests/api/tweens/test_auth.py new file mode 100644 index 0000000000..21ad97bafd --- /dev/null +++ b/tests/api/tweens/test_auth.py @@ -0,0 +1,95 @@ +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from pyramid.httpexceptions import HTTPForbidden + +from paasta_tools.api.tweens import auth + + +@pytest.fixture +def mock_auth_tween(): + with patch.dict( + os.environ, + { + "PAASTA_API_AUTH_ENFORCE": "1", + "PAASTA_API_AUTH_ENDPOINT": "http://localhost:31337", + }, + ): + with patch("paasta_tools.api.tweens.auth.requests"): + yield auth.AuthTweenFactory(MagicMock(), MagicMock()) + + +def test_call(mock_auth_tween): + mock_request = MagicMock( + path="/something", + method="post", + headers={"Authorization": "Bearer aaa.bbb.ccc"}, + ) + with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized: + mock_is_authorized.return_value = auth.AuthorizationOutcome(True, "Ok") + mock_auth_tween(mock_request) + mock_is_authorized.assert_called_once_with("/something", "aaa.bbb.ccc", "post") + mock_auth_tween.handler.assert_called_once_with(mock_request) + + +def test_call_deny(mock_auth_tween): + mock_request = MagicMock( + path="/something", + method="post", + headers={"Authorization": "Bearer aaa.bbb.ccc"}, + ) + with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized: + mock_is_authorized.return_value = auth.AuthorizationOutcome(False, "Denied") + response = mock_auth_tween(mock_request) + assert isinstance(response, HTTPForbidden) + assert response.headers.get("X-Auth-Failure-Reason") == "Denied" + + +def test_is_request_authorized(mock_auth_tween): + mock_auth_tween.session.post.return_value.json.return_value = { + "result": {"allowed": True, "reason": "User allowed"} + } + assert mock_auth_tween.is_request_authorized( + "/allowed", "aaa.bbb.ccc", "get" + ) == auth.AuthorizationOutcome(True, "User allowed") + mock_auth_tween.session.post.assert_called_once_with( + url="http://localhost:31337", + json={ + "input": { + "path": "/allowed", + "backend": "paasta", + "token": "aaa.bbb.ccc", + "method": "get", + } + }, + timeout=2, + ) + + +def test_is_request_authorized_fail(mock_auth_tween): + mock_auth_tween.session.post.side_effect = Exception + assert mock_auth_tween.is_request_authorized( + "/allowed", "eee.ddd.fff", "get" + ) == auth.AuthorizationOutcome(False, "Auth backend error") + + +def test_is_request_authorized_malformed(mock_auth_tween): + mock_auth_tween.session.post.return_value.json.return_value = {"foo": "bar"} + assert mock_auth_tween.is_request_authorized( + "/allowed", "eee.ddd.fff", "post" + ) == auth.AuthorizationOutcome(False, "Malformed auth response") From d99aaf1c69901b7ea82a21bdcda7218aede846ac Mon Sep 17 00:00:00 2001 From: Matteo Piano Date: Fri, 1 Nov 2024 09:18:07 -0700 Subject: [PATCH 03/13] allow passing auth token to API client --- paasta_tools/api/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py index fc82dcc15c..7cde15d7df 100644 --- a/paasta_tools/api/client.py +++ b/paasta_tools/api/client.py @@ -48,6 +48,7 @@ def get_paasta_oapi_client_by_url( cert_file: Optional[str] = None, key_file: Optional[str] = None, ssl_ca_cert: Optional[str] = None, + auth_token: str = "", ) -> PaastaOApiClient: server_variables = dict(scheme=parsed_url.scheme, host=parsed_url.netloc) config = paastaapi.Configuration( @@ -63,6 +64,9 @@ def get_paasta_oapi_client_by_url( client.rest_client.pool_manager.connection_pool_kw[ "timeout" ] = load_system_paasta_config().get_api_client_timeout() + # SEC-19555: support auth in PaaSTA APIs + if auth_token: + client.set_default_header("Authorization", f"Bearer {auth_token}") return PaastaOApiClient( autoscaler=paastaapis.AutoscalerApi(client), default=paastaapis.DefaultApi(client), @@ -79,6 +83,7 @@ def get_paasta_oapi_client( cluster: str = None, system_paasta_config: SystemPaastaConfig = None, http_res: bool = False, + auth_token: str = "", ) -> Optional[PaastaOApiClient]: if not system_paasta_config: system_paasta_config = load_system_paasta_config() @@ -94,4 +99,6 @@ def get_paasta_oapi_client( parsed = urlparse(api_endpoints[cluster]) cert_file = key_file = ssl_ca_cert = None - return get_paasta_oapi_client_by_url(parsed, cert_file, key_file, ssl_ca_cert) + return get_paasta_oapi_client_by_url( + parsed, cert_file, key_file, ssl_ca_cert, auth_token + ) From 18a72fb8dc80b8296062235bee1aaee11a0d3360 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Fri, 1 Nov 2024 17:24:21 -0700 Subject: [PATCH 04/13] More remote_run stuff --- paasta_tools/api/api_docs/oapi.yaml | 22 ++++-- paasta_tools/api/views/instance.py | 8 ++- paasta_tools/cli/cmds/remote_run_2.py | 81 +++++++++++++++++++++-- paasta_tools/paasta_remote_run_2.py | 80 +++++++++++++++++----- paasta_tools/paastaapi/api/service_api.py | 26 ++++---- paasta_tools/paastaapi/models/__init__.py | 1 + 6 files changed, 179 insertions(+), 39 deletions(-) diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index a5b4310684..400f627735 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1630,6 +1630,22 @@ paths: /services/{service}/{instance}/remote_run: post: operationId: remote_run + requestBody: + content: + application/json: + schema: + type: object + properties: + interactive: + type: bool + user: + type: string + image: + type: string + required: + - interactive + - user + required: true parameters: - description: Service name in: path @@ -1643,12 +1659,6 @@ paths: required: true schema: type: string - - description: Username - in: query - name: user - required: true - schema: - type: string responses: "200": content: diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 274fe782af..4c4e161bb6 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -395,9 +395,15 @@ def remote_run(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") user = request.swagger_data.get("user") + interactive = request.swagger_data.get("interactive", True) + recreate = request.swagger_data.get("recreate", True) try: - paasta_remote_run_2.remote_run_start(service, instance, user, settings.cluster) + response = paasta_remote_run_2.remote_run_start( + service, instance, user, settings.cluster, interactive, recreate + ) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) + + return response diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py index dd380ccaf1..5896a5aedb 100644 --- a/paasta_tools/cli/cmds/remote_run_2.py +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -12,9 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json +import os +import subprocess +import sys +import time + from paasta_tools.api.client import get_paasta_oapi_client +from paasta_tools.cli.cmds.check import makefile_responds_to +from paasta_tools.cli.cmds.cook_image import paasta_cook_image from paasta_tools.cli.utils import get_paasta_oapi_api_clustername from paasta_tools.cli.utils import lazy_choices_completer +from paasta_tools.utils import DEFAULT_SOA_DIR +from paasta_tools.utils import get_username from paasta_tools.utils import list_services from paasta_tools.utils import load_system_paasta_config from paasta_tools.utils import PaastaColors @@ -56,6 +66,31 @@ def add_subparser( help="Run stuff remotely.", description=("'paasta remote-run' runs stuff remotely "), ) + remote_run_parser.add_argument( + "-b", + "--build", + dest="build", + help="Build the image from current directory", + action="store_true", + ) + remote_run_parser.add_argument( + "-y", + "--yelpsoa-config-root", + dest="yelpsoa_config_root", + help="A directory from which yelpsoa-configs should be read from", + default=DEFAULT_SOA_DIR, + ) + remote_run_parser.add_argument( + "-I", + "--interactive", + help=( + 'Run container in interactive mode. If interactive is set the default command will be "bash" ' + 'unless otherwise set by the "--cmd" flag' + ), + action="store_true", + required=False, + default=False, + ) add_common_args_to_parser(remote_run_parser) remote_run_parser.set_defaults(command=remote_run) @@ -67,9 +102,23 @@ def paasta_remote_run( system_paasta_config: SystemPaastaConfig, verbose: int, is_eks: bool = False, + build: bool = False, ) -> int: + output = [] ret_code = 0 + + # TODO: Build + if build and not makefile_responds_to("cook-image"): + print( + "A local Makefile with a 'cook-image' target is required for --build", + file=sys.stderr, + ) + default_tag = "paasta-remote-run-{}-{}".format(service, get_username()) + os.environ["DOCKER_TAG"] = default_tag + paasta_cook_image(args=None, service=service, soa_dir=soa_dir) + # TODO Actually push the image + client = get_paasta_oapi_client( cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks), system_paasta_config=system_paasta_config, @@ -79,11 +128,16 @@ def paasta_remote_run( exit(1) try: + # TODO add image argument if build response = client.service.remote_run( - service=service, instance=instance, user="qlo" + service=service, + instance=instance, + user=get_username(), ) - print(response) + print("Reponse was: ", response) + response = json.loads(response) except client.api_error as exc: + print(exc, file=sys.stderr) output.append(PaastaColors.red(exc.reason)) ret_code = exc.status except (client.connection_error, client.timeout_error) as exc: @@ -96,14 +150,31 @@ def paasta_remote_run( output.append(str(e)) ret_code = 1 - print("\n".join(output)) + if ret_code: + print("\n".join(output)) + return ret_code + + pod_name, namespace = response["pod_name"], response["namespace"] + exec_command_tmpl = "kubectl{eks}-{cluster} exec -it -n {namespace} {pod} /bin/bash" + exec_command = exec_command_tmpl.format( + eks="-eks" if is_eks else "", cluster=cluster, namespace=namespace, pod=pod_name + ) + print("Pod launched successfully:", pod_name) + + # TODO figure out how to get this to work + # print('Attaching shell') + # cmd = subprocess.Popen(exec_command.split(' ')) + print("Run the following command to enter your service pod") + print(exec_command) return ret_code def remote_run(args) -> int: """Run stuff, but remotely!""" - system_paasta_config = load_system_paasta_config() - paasta_remote_run( + system_paasta_config = load_system_paasta_config( + "/nail/home/qlo/paasta_config/paasta/" + ) + return paasta_remote_run( args.cluster, args.service, args.instance, system_paasta_config, 1, False ) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index ed60a73aa7..0c60d70d0f 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -1,5 +1,8 @@ +import json from time import sleep +from kubernetes.client.exceptions import ApiException + from paasta_tools.kubernetes.application.controller_wrappers import ( get_application_wrapper, ) @@ -8,44 +11,91 @@ from paasta_tools.utils import DEFAULT_SOA_DIR -def remote_run_start(service, instance, user, cluster): - kube_client = KubeClient() +def remote_run_start(service, instance, user, cluster, interactive, recreate): + # TODO Overriding the kube client config for now as the api has limited permissions + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + + # TODO hardcoded for now is_eks = False + + # Load the service deployment settings deployment = load_kubernetes_service_config_no_cache( service, instance, cluster, DEFAULT_SOA_DIR ) namespace = deployment.get_namespace() + # Set to interactive mode + if interactive: + deployment.config_dict["cmd"] = "sleep 604800" # One week + + # Create the app with a new name formatted_application = deployment.format_kubernetes_app() formatted_application.metadata.name += f"-remote-run-{user}" - pod_name = formatted_application.metadata.name + deployment_name = formatted_application.metadata.name app_wrapper = get_application_wrapper(formatted_application) app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) - app_wrapper.create(kube_client) + # Launch pod + try: + app_wrapper.create(kube_client) + except ApiException as e: + if e.status == 409: + # Deployment already running + if recreate: + app_wrapper.deep_delete(kube_client) + wait_until_deployment_gone(kube_client, namespace, deployment_name) + app_wrapper.create(kube_client) + else: + raise + + pod = wait_until_pod_running(kube_client, namespace, deployment_name) + + return json.dumps( + {"status": "success", "pod_name": pod.metadata.name, "namespace": namespace} + ) + + +def wait_until_deployment_gone(kube_client, namespace, deployment_name): + for retry in range(10): + pod = find_pod(kube_client, namespace, deployment_name, 1) + if not pod: + return + sleep(5) + raise Exception("Pod still exists!") + + +def find_pod(kube_client, namespace, deployment_name, retries=5): # Get pod status and name - for retry in range(5): + for retry in range(retries): pod_list = kube_client.core.list_namespaced_pod(namespace) matching_pod = None for pod in pod_list.items: - if pod.metadata.name.startswith(pod_name): + if pod.metadata.name.startswith(deployment_name): matching_pod = pod break if not matching_pod: - sleep(1) + sleep(2) continue + return matching_pod + return None + +def wait_until_pod_running(kube_client, namespace, deployment_name): + for retry in range(5): + pod = find_pod(kube_client, namespace, deployment_name) + if not pod: + raise Exception("No matching pod!") if pod.status.phase == "Running": break - elif pod.status.phase != "Initializing": + elif pod.status.phase not in ("Initializing", "Pending"): raise Exception(f"Pod state is {pod.status.phase}") - - if not matching_pod: - raise Exception("No matching pod") - - return {"Status": "Success!", "pod_name": pod.metadata.name, "namespace": namespace} + return pod -def remote_run_stop(): - pass +# def remote_run_stop(): +# TODO Should this happen here or should the client kill the deployment directly? +# Load the service deployment settings +# deployment = load_kubernetes_service_config_no_cache( +# service, instance, cluster, DEFAULT_SOA_DIR +# ) diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py index 7db7aec9fe..8050e75d2b 100644 --- a/paasta_tools/paastaapi/api/service_api.py +++ b/paasta_tools/paastaapi/api/service_api.py @@ -27,6 +27,7 @@ from paasta_tools.paastaapi.model.flink_config import FlinkConfig from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs +from paasta_tools.paastaapi.model.inline_object1 import InlineObject1 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus @@ -1303,7 +1304,7 @@ def __remote_run( self, service, instance, - user, + inline_object1, **kwargs ): """Launch a remote-run pod # noqa: E501 @@ -1311,13 +1312,13 @@ def __remote_run( This method makes a synchronous HTTP request by default. To make an asynchronous HTTP request, please pass async_req=True - >>> thread = api.remote_run(service, instance, user, async_req=True) + >>> thread = api.remote_run(service, instance, inline_object1, async_req=True) >>> result = thread.get() Args: service (str): Service name instance (str): Instance name - user (str): Username + inline_object1 (InlineObject1): Keyword Args: _return_http_data_only (bool): response data without head status @@ -1368,8 +1369,8 @@ def __remote_run( service kwargs['instance'] = \ instance - kwargs['user'] = \ - user + kwargs['inline_object1'] = \ + inline_object1 return self.call_with_http_info(**kwargs) self.remote_run = Endpoint( @@ -1385,12 +1386,12 @@ def __remote_run( 'all': [ 'service', 'instance', - 'user', + 'inline_object1', ], 'required': [ 'service', 'instance', - 'user', + 'inline_object1', ], 'nullable': [ ], @@ -1409,18 +1410,17 @@ def __remote_run( (str,), 'instance': (str,), - 'user': - (str,), + 'inline_object1': + (InlineObject1,), }, 'attribute_map': { 'service': 'service', 'instance': 'instance', - 'user': 'user', }, 'location_map': { 'service': 'path', 'instance': 'path', - 'user': 'query', + 'inline_object1': 'body', }, 'collection_format_map': { } @@ -1429,7 +1429,9 @@ def __remote_run( 'accept': [ 'application/json' ], - 'content_type': [], + 'content_type': [ + 'application/json' + ] }, api_client=api_client, callable=__remote_run diff --git a/paasta_tools/paastaapi/models/__init__.py b/paasta_tools/paastaapi/models/__init__.py index 0d1278ee23..bda54f7889 100644 --- a/paasta_tools/paastaapi/models/__init__.py +++ b/paasta_tools/paastaapi/models/__init__.py @@ -26,6 +26,7 @@ from paasta_tools.paastaapi.model.float_and_error import FloatAndError from paasta_tools.paastaapi.model.hpa_metric import HPAMetric from paasta_tools.paastaapi.model.inline_object import InlineObject +from paasta_tools.paastaapi.model.inline_object1 import InlineObject1 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus From c6d4c20fbe0fc8c98d4d038e644287f259982895 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Tue, 5 Nov 2024 19:54:03 -0800 Subject: [PATCH 05/13] change Deployment to Job --- paasta_tools/api/api.py | 4 +- paasta_tools/api/api_docs/oapi.yaml | 83 ++++++++++- paasta_tools/api/api_docs/swagger.json | 96 +++++++++++- paasta_tools/api/views/instance.py | 16 +- .../application/controller_wrappers.py | 58 +++++++- paasta_tools/kubernetes_tools.py | 87 ++++++++++- paasta_tools/paasta_remote_run_2.py | 45 +++--- paasta_tools/paastaapi/api/service_api.py | 138 ------------------ paasta_tools/paastaapi/apis/__init__.py | 1 + paasta_tools/paastaapi/models/__init__.py | 1 + 10 files changed, 358 insertions(+), 171 deletions(-) diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index a0f20d89c8..8108e78548 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -149,8 +149,8 @@ def make_app(global_config=None): "/v1/services/{service}/{instance}/tasks/{task_id}", ) config.add_route( - "service.instance.remote_run", - "/v1/services/{service}/{instance}/remote_run", + "remote_run", + "/v1/remote_run/{service}/{instance}", ) config.add_route("service.list", "/v1/services/{service}") config.add_route("services", "/v1/services") diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index 400f627735..56db1b467b 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1627,9 +1627,9 @@ paths: summary: Get mesos task of service_name.instance_name by task_id tags: - service - /services/{service}/{instance}/remote_run: + /remote_run/{service}/{instance}/start: post: - operationId: remote_run + operationId: remote_run_start requestBody: content: application/json: @@ -1672,7 +1672,84 @@ paths: description: Failure summary: Launch a remote-run pod tags: - - service + - remote_run + /remote_run/{service}/{instance}/stop: + post: + operationId: remote_run_sop + requestBody: + content: + application/json: + schema: + type: object + properties: + user: + type: string + required: + - user + required: true + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + type: string + description: Remote run pod stopped + "404": + description: Deployment key not found + "500": + description: Failure + summary: Stop a remote-run pod + tags: + - remote_run + /remote_run/{service}/{instance}/token: + get: + operationId: remote_run_token + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + - description: User name + in: query + name: user + schema: + type: string + required: true + responses: + "200": + content: + application/json: + schema: + type: string + description: Token generated successfully + "404": + description: Deployment key not found + "500": + description: Failure + summary: Get a short lived token for exec into remote-run pod + tags: + - remote_run /version: get: operationId: showVersion diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 7ee0a58740..4c4b4e42a6 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -847,7 +847,7 @@ ] } }, - "/services/{service}/{instance}/remote_run": { + "/remote_run/{service}/{instance}/start": { "post": { "responses": { "200": { @@ -864,7 +864,53 @@ } }, "summary": "Do a remote run", - "operationId": "remote_run", + "operationId": "remote_run_start", + "tags": [ + "remote_run" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "query", + "description": "Username", + "name": "user", + "required": true, + "type": "string" + } + ] + } + }, + "/remote_run/{service}/{instance}/stop": { + "post": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Stop a remote run", + "operationId": "remote_run_stop", "tags": [ "service" ], @@ -892,6 +938,52 @@ } ] } + }, + "/remote_run/{service}/{instance}/token": { + "get": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Get a remote run token", + "operationId": "remote_run_token", + "tags": [ + "remote_run" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "query", + "description": "Username", + "name": "user", + "required": true, + "type": "string" + } + ] + } } }, "definitions": { diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 4c4e161bb6..224fd8fd4c 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -388,10 +388,18 @@ def instance_mesh_status(request): return instance_mesh -@view_config( - route_name="service.instance.remote_run", request_method="POST", renderer="json" -) -def remote_run(request): +@view_config(route_name="remote_run.stop", request_method="POST", renderer="json") +def remote_run_stop(request): + pass + + +@view_config(route_name="remote_run.token", request_method="POST", renderer="json") +def remote_run_token(request): + pass + + +@view_config(route_name="remote_run.start", request_method="POST", renderer="json") +def remote_run_start(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") user = request.swagger_data.get("user") diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py index 21026120f8..2069871e69 100644 --- a/paasta_tools/kubernetes/application/controller_wrappers.py +++ b/paasta_tools/kubernetes/application/controller_wrappers.py @@ -7,12 +7,14 @@ from kubernetes.client import V1beta1PodDisruptionBudget from kubernetes.client import V1DeleteOptions from kubernetes.client import V1Deployment +from kubernetes.client import V1Job from kubernetes.client import V1StatefulSet from kubernetes.client.rest import ApiException from paasta_tools.autoscaling.autoscaling_service_lib import autoscaling_is_paused from paasta_tools.eks_tools import load_eks_service_config_no_cache from paasta_tools.kubernetes_tools import create_deployment +from paasta_tools.kubernetes_tools import create_job from paasta_tools.kubernetes_tools import create_pod_disruption_budget from paasta_tools.kubernetes_tools import create_stateful_set from paasta_tools.kubernetes_tools import ensure_service_account @@ -51,7 +53,6 @@ def __init__( "config_sha", ] } - replicas = ( item.spec.replicas if item.metadata.labels.get(paasta_prefixed("autoscaled"), "false") @@ -411,14 +412,67 @@ def update(self, kube_client: KubeClient): ) +class JobWrapper(Application): + def __init__( + self, + item: V1Job, + logging=logging.getLogger(__name__), + ): + item.spec.replicas = None + super().__init__(item, logging) + + def deep_delete(self, kube_client: KubeClient) -> None: + """ + Remove all controllers, pods, and pod disruption budgets related to this application + :param kube_client: + """ + delete_options = V1DeleteOptions(propagation_policy="Foreground") + try: + kube_client.batchess.delete_namespaced_job( + self.item.metadata.name, + self.item.metadata.namespace, + body=delete_options, + ) + except ApiException as e: + if e.status == 404: + # Job does not exist, nothing to delete but + # we can consider this a success. + self.logging.debug( + "not deleting nonexistent job/{} from namespace/{}".format( + self.item.metadata.name, self.item.metadata.namespace + ) + ) + else: + raise + else: + self.logging.info( + "deleted job/{} from namespace/{}".format( + self.item.metadata.name, self.item.metadata.namespace + ) + ) + + def create(self, kube_client: KubeClient): + create_job( + kube_client=kube_client, + formatted_job=self.item, + namespace=self.soa_config.get_namespace(), + ) + + def update(self, kube_client: KubeClient): + # TODO: Is this needed? + pass + + def get_application_wrapper( - formatted_application: Union[V1Deployment, V1StatefulSet] + formatted_application: Union[V1Deployment, V1StatefulSet, V1Job] ) -> Application: app: Application if isinstance(formatted_application, V1Deployment): app = DeploymentWrapper(formatted_application) elif isinstance(formatted_application, V1StatefulSet): app = StatefulSetWrapper(formatted_application) + elif isinstance(formatted_application, V1Job): + app = JobWrapper(formatted_application) else: raise Exception("Unknown kubernetes object to update") diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index a4a6f43392..51124637d8 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -48,6 +48,7 @@ from humanfriendly import parse_size from kubernetes import client as kube_client from kubernetes import config as kube_config +from kubernetes.client import AuthenticationV1TokenRequest from kubernetes.client import CoreV1Event from kubernetes.client import models from kubernetes.client import V1Affinity @@ -71,6 +72,8 @@ from kubernetes.client import V1ExecAction from kubernetes.client import V1HostPathVolumeSource from kubernetes.client import V1HTTPGetAction +from kubernetes.client import V1Job +from kubernetes.client import V1JobSpec from kubernetes.client import V1KeyToPath from kubernetes.client import V1LabelSelector from kubernetes.client import V1Lifecycle @@ -113,6 +116,7 @@ from kubernetes.client import V1StatefulSetSpec from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction +from kubernetes.client import V1TokenRequestSpec from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume from kubernetes.client import V1VolumeMount @@ -605,6 +609,7 @@ def __init__( self.core = kube_client.CoreV1Api(self.api_client) self.policy = kube_client.PolicyV1beta1Api(self.api_client) self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client) + self.batches = kube_client.BatchV1Api(self.api_client) self.custom = kube_client.CustomObjectsApi(self.api_client) self.autoscaling = kube_client.AutoscalingV2beta2Api(self.api_client) @@ -2029,6 +2034,62 @@ def get_pod_management_policy(self) -> str: """Get sts pod_management_policy from config, default to 'OrderedReady'""" return self.config_dict.get("pod_management_policy", "OrderedReady") + def format_as_kubernetes_job(self) -> V1Job: + """Create the config for launching the deployment as a Job""" + try: + docker_url = self.get_docker_url() + git_sha = get_git_sha_from_dockerurl(docker_url, long=True) + system_paasta_config = load_system_paasta_config() + complete_config = V1Job( + api_version="batch/v1", + kind="Job", + metadata=self.get_kubernetes_metadata(git_sha), + spec=V1JobSpec( + active_deadline_seconds=3600, + selector=V1LabelSelector( + match_labels={ + "paasta.yelp.com/service": self.get_service(), + "paasta.yelp.com/instance": self.get_instance(), + } + ), + template=self.get_pod_template_spec( + git_sha=git_sha, system_paasta_config=system_paasta_config + ), + ), + ) + + prometheus_shard = self.get_prometheus_shard() + if prometheus_shard: + complete_config.metadata.labels[ + "paasta.yelp.com/prometheus_shard" + ] = prometheus_shard + + image_version = self.get_image_version() + if image_version is not None: + complete_config.metadata.labels[ + "paasta.yelp.com/image_version" + ] = image_version + + complete_config.metadata.labels["paasta.yelp.com/remote_run"] = "true" + # DO NOT ADD LABELS AFTER THIS LINE + config_hash = get_config_hash( + self.sanitize_for_config_hash(complete_config), + force_bounce=self.get_force_bounce(), + ) + complete_config.metadata.labels["yelp.com/paasta_config_sha"] = config_hash + complete_config.metadata.labels["paasta.yelp.com/config_sha"] = config_hash + + complete_config.spec.template.metadata.labels[ + "yelp.com/paasta_config_sha" + ] = config_hash + complete_config.spec.template.metadata.labels[ + "paasta.yelp.com/config_sha" + ] = config_hash + except Exception as e: + raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance()) + log.debug("Complete configuration for instance is: %s", complete_config) + return complete_config + def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]: """Create the configuration that will be passed to the Kubernetes REST API.""" @@ -2517,7 +2578,7 @@ def sanitize_for_config_hash( # remove data we dont want used to hash configs # replica count - if ahash["spec"] is not None: + if ahash["spec"] is not None and "replicas" in ahash["spec"]: del ahash["spec"]["replicas"] if ahash["metadata"] is not None: @@ -3631,6 +3692,16 @@ def create_stateful_set( ) +def create_job( + kube_client: KubeClient, + formatted_job: V1Job, + namespace: str, +) -> None: + return kube_client.batches.create_namespaced_job( + namespace=namespace, body=formatted_job + ) + + def update_stateful_set( kube_client: KubeClient, formatted_stateful_set: V1StatefulSet, @@ -4354,6 +4425,20 @@ def get_kubernetes_secret_env_variables( return decrypted_secrets +def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str): + """Create a short lived token for exec""" + audience = "remote-run-" + user + service_account = "default" + token_spec = V1TokenRequestSpec( + expiration_seconds=600, audiences=[audience] # minimum allowed by k8s + ) + request = AuthenticationV1TokenRequest(spec=token_spec) + response = kube_client.core.create_namespaced_service_account_token( + service_account, namespace, request + ) + return response + + def get_kubernetes_secret_volumes( kube_client: KubeClient, secret_volumes_config: Sequence[SecretVolume], diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 0c60d70d0f..e2f49caf2a 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -6,11 +6,22 @@ from paasta_tools.kubernetes.application.controller_wrappers import ( get_application_wrapper, ) +from paasta_tools.kubernetes_tools import create_temp_exec_token from paasta_tools.kubernetes_tools import KubeClient from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache from paasta_tools.utils import DEFAULT_SOA_DIR +def create_exec_token(namespace): + """Creates a short lived token for execing into a pod""" + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + try: + token = create_temp_exec_token(kube_client, namespace, user) + except ApiException as E: + raise + return token["token"] + + def remote_run_start(service, instance, user, cluster, interactive, recreate): # TODO Overriding the kube client config for now as the api has limited permissions kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") @@ -29,10 +40,10 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate): deployment.config_dict["cmd"] = "sleep 604800" # One week # Create the app with a new name - formatted_application = deployment.format_kubernetes_app() - formatted_application.metadata.name += f"-remote-run-{user}" - deployment_name = formatted_application.metadata.name - app_wrapper = get_application_wrapper(formatted_application) + formatted_job = deployment.format_as_kubernetes_job() + formatted_job.metadata.name = f"remote-run-{user}-{formatted_job.metadata.name}" + job_name = formatted_job.metadata.name + app_wrapper = get_application_wrapper(formatted_job) app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) # Launch pod @@ -40,37 +51,33 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate): app_wrapper.create(kube_client) except ApiException as e: if e.status == 409: - # Deployment already running - if recreate: - app_wrapper.deep_delete(kube_client) - wait_until_deployment_gone(kube_client, namespace, deployment_name) - app_wrapper.create(kube_client) - else: - raise + # Job already running + return json.dumps({"status": "409", "reason": "Job already running"}) + raise - pod = wait_until_pod_running(kube_client, namespace, deployment_name) + pod = wait_until_pod_running(kube_client, namespace, job_name) return json.dumps( - {"status": "success", "pod_name": pod.metadata.name, "namespace": namespace} + {"status": "200", "pod_name": pod.metadata.name, "namespace": namespace} ) -def wait_until_deployment_gone(kube_client, namespace, deployment_name): +def wait_until_deployment_gone(kube_client, namespace, job_name): for retry in range(10): - pod = find_pod(kube_client, namespace, deployment_name, 1) + pod = find_pod(kube_client, namespace, job_name, 1) if not pod: return sleep(5) raise Exception("Pod still exists!") -def find_pod(kube_client, namespace, deployment_name, retries=5): +def find_pod(kube_client, namespace, job_name, retries=5): # Get pod status and name for retry in range(retries): pod_list = kube_client.core.list_namespaced_pod(namespace) matching_pod = None for pod in pod_list.items: - if pod.metadata.name.startswith(deployment_name): + if pod.metadata.name.startswith(job_name): matching_pod = pod break @@ -81,9 +88,9 @@ def find_pod(kube_client, namespace, deployment_name, retries=5): return None -def wait_until_pod_running(kube_client, namespace, deployment_name): +def wait_until_pod_running(kube_client, namespace, job_name): for retry in range(5): - pod = find_pod(kube_client, namespace, deployment_name) + pod = find_pod(kube_client, namespace, job_name) if not pod: raise Exception("No matching pod!") if pod.status.phase == "Running": diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py index 8050e75d2b..63901f7cee 100644 --- a/paasta_tools/paastaapi/api/service_api.py +++ b/paasta_tools/paastaapi/api/service_api.py @@ -27,7 +27,6 @@ from paasta_tools.paastaapi.model.flink_config import FlinkConfig from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs -from paasta_tools.paastaapi.model.inline_object1 import InlineObject1 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus @@ -1300,143 +1299,6 @@ def __mesh_instance( callable=__mesh_instance ) - def __remote_run( - self, - service, - instance, - inline_object1, - **kwargs - ): - """Launch a remote-run pod # noqa: E501 - - This method makes a synchronous HTTP request by default. To make an - asynchronous HTTP request, please pass async_req=True - - >>> thread = api.remote_run(service, instance, inline_object1, async_req=True) - >>> result = thread.get() - - Args: - service (str): Service name - instance (str): Instance name - inline_object1 (InlineObject1): - - Keyword Args: - _return_http_data_only (bool): response data without head status - code and headers. Default is True. - _preload_content (bool): if False, the urllib3.HTTPResponse object - will be returned without reading/decoding response data. - Default is True. - _request_timeout (float/tuple): timeout setting for this request. If one - number provided, it will be total request timeout. It can also - be a pair (tuple) of (connection, read) timeouts. - Default is None. - _check_input_type (bool): specifies if type checking - should be done one the data sent to the server. - Default is True. - _check_return_type (bool): specifies if type checking - should be done one the data received from the server. - Default is True. - _host_index (int/None): specifies the index of the server - that we want to use. - Default is read from the configuration. - async_req (bool): execute request asynchronously - - Returns: - str - If the method is called asynchronously, returns the request - thread. - """ - kwargs['async_req'] = kwargs.get( - 'async_req', False - ) - kwargs['_return_http_data_only'] = kwargs.get( - '_return_http_data_only', True - ) - kwargs['_preload_content'] = kwargs.get( - '_preload_content', True - ) - kwargs['_request_timeout'] = kwargs.get( - '_request_timeout', None - ) - kwargs['_check_input_type'] = kwargs.get( - '_check_input_type', True - ) - kwargs['_check_return_type'] = kwargs.get( - '_check_return_type', True - ) - kwargs['_host_index'] = kwargs.get('_host_index') - kwargs['service'] = \ - service - kwargs['instance'] = \ - instance - kwargs['inline_object1'] = \ - inline_object1 - return self.call_with_http_info(**kwargs) - - self.remote_run = Endpoint( - settings={ - 'response_type': (str,), - 'auth': [], - 'endpoint_path': '/services/{service}/{instance}/remote_run', - 'operation_id': 'remote_run', - 'http_method': 'POST', - 'servers': None, - }, - params_map={ - 'all': [ - 'service', - 'instance', - 'inline_object1', - ], - 'required': [ - 'service', - 'instance', - 'inline_object1', - ], - 'nullable': [ - ], - 'enum': [ - ], - 'validation': [ - ] - }, - root_map={ - 'validations': { - }, - 'allowed_values': { - }, - 'openapi_types': { - 'service': - (str,), - 'instance': - (str,), - 'inline_object1': - (InlineObject1,), - }, - 'attribute_map': { - 'service': 'service', - 'instance': 'instance', - }, - 'location_map': { - 'service': 'path', - 'instance': 'path', - 'inline_object1': 'body', - }, - 'collection_format_map': { - } - }, - headers_map={ - 'accept': [ - 'application/json' - ], - 'content_type': [ - 'application/json' - ] - }, - api_client=api_client, - callable=__remote_run - ) - def __status_instance( self, service, diff --git a/paasta_tools/paastaapi/apis/__init__.py b/paasta_tools/paastaapi/apis/__init__.py index a93cebf52c..ab815d8c70 100644 --- a/paasta_tools/paastaapi/apis/__init__.py +++ b/paasta_tools/paastaapi/apis/__init__.py @@ -17,5 +17,6 @@ # Import APIs into API package: from paasta_tools.paastaapi.api.autoscaler_api import AutoscalerApi from paasta_tools.paastaapi.api.default_api import DefaultApi +from paasta_tools.paastaapi.api.remote_run_api import RemoteRunApi from paasta_tools.paastaapi.api.resources_api import ResourcesApi from paasta_tools.paastaapi.api.service_api import ServiceApi diff --git a/paasta_tools/paastaapi/models/__init__.py b/paasta_tools/paastaapi/models/__init__.py index bda54f7889..2a8cbf44b2 100644 --- a/paasta_tools/paastaapi/models/__init__.py +++ b/paasta_tools/paastaapi/models/__init__.py @@ -27,6 +27,7 @@ from paasta_tools.paastaapi.model.hpa_metric import HPAMetric from paasta_tools.paastaapi.model.inline_object import InlineObject from paasta_tools.paastaapi.model.inline_object1 import InlineObject1 +from paasta_tools.paastaapi.model.inline_object2 import InlineObject2 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus From 0b2e0827081c2caf5f0337de0e9e8caa6b3614e7 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Wed, 6 Nov 2024 16:24:58 -0800 Subject: [PATCH 06/13] Ensure service account --- paasta_tools/api/api.py | 12 +++- paasta_tools/api/api_docs/oapi.yaml | 2 +- .../application/controller_wrappers.py | 6 +- paasta_tools/kubernetes_tools.py | 64 +++++++++++++------ 4 files changed, 55 insertions(+), 29 deletions(-) diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index 8108e78548..0f3ba149a2 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -149,8 +149,16 @@ def make_app(global_config=None): "/v1/services/{service}/{instance}/tasks/{task_id}", ) config.add_route( - "remote_run", - "/v1/remote_run/{service}/{instance}", + "remote_run_start", + "/v1/remote_run/{service}/{instance}/start", + ) + config.add_route( + "remote_run_stop", + "/v1/remote_run/{service}/{instance}/stop", + ) + config.add_route( + "remote_run_token", + "/v1/remote_run/{service}/{instance}/token", ) config.add_route("service.list", "/v1/services/{service}") config.add_route("services", "/v1/services") diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index 56db1b467b..b70e434fab 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1675,7 +1675,7 @@ paths: - remote_run /remote_run/{service}/{instance}/stop: post: - operationId: remote_run_sop + operationId: remote_run_stop requestBody: content: application/json: diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py index 2069871e69..09d7d90ec4 100644 --- a/paasta_tools/kubernetes/application/controller_wrappers.py +++ b/paasta_tools/kubernetes/application/controller_wrappers.py @@ -428,7 +428,7 @@ def deep_delete(self, kube_client: KubeClient) -> None: """ delete_options = V1DeleteOptions(propagation_policy="Foreground") try: - kube_client.batchess.delete_namespaced_job( + kube_client.batches.delete_namespaced_job( self.item.metadata.name, self.item.metadata.namespace, body=delete_options, @@ -458,10 +458,6 @@ def create(self, kube_client: KubeClient): namespace=self.soa_config.get_namespace(), ) - def update(self, kube_client: KubeClient): - # TODO: Is this needed? - pass - def get_application_wrapper( formatted_application: Union[V1Deployment, V1StatefulSet, V1Job] diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 51124637d8..90b97afc4e 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -2046,12 +2046,6 @@ def format_as_kubernetes_job(self) -> V1Job: metadata=self.get_kubernetes_metadata(git_sha), spec=V1JobSpec( active_deadline_seconds=3600, - selector=V1LabelSelector( - match_labels={ - "paasta.yelp.com/service": self.get_service(), - "paasta.yelp.com/instance": self.get_instance(), - } - ), template=self.get_pod_template_spec( git_sha=git_sha, system_paasta_config=system_paasta_config ), @@ -2071,23 +2065,11 @@ def format_as_kubernetes_job(self) -> V1Job: ] = image_version complete_config.metadata.labels["paasta.yelp.com/remote_run"] = "true" - # DO NOT ADD LABELS AFTER THIS LINE - config_hash = get_config_hash( - self.sanitize_for_config_hash(complete_config), - force_bounce=self.get_force_bounce(), - ) - complete_config.metadata.labels["yelp.com/paasta_config_sha"] = config_hash - complete_config.metadata.labels["paasta.yelp.com/config_sha"] = config_hash - - complete_config.spec.template.metadata.labels[ - "yelp.com/paasta_config_sha" - ] = config_hash - complete_config.spec.template.metadata.labels[ - "paasta.yelp.com/config_sha" - ] = config_hash except Exception as e: raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance()) - log.debug("Complete configuration for instance is: %s", complete_config) + log.debug( + "Complete configuration for remote-run instance is: %s", complete_config + ) return complete_config def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]: @@ -2847,6 +2829,7 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None: else: raise + ensure_paasta_remote_run_service_account(kube_client, namespace) ensure_paasta_api_rolebinding(kube_client, namespace) ensure_paasta_namespace_limits(kube_client, namespace) @@ -2880,6 +2863,45 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No ) +def ensure_paasta_remote_run_service_account( + kube_client: KubeClient, namespace: str +) -> None: + """Checks whether a ServiceAccount named paasta-remote-run exists in the namespace, + and if it doesn't, create it and bind paasta-remote-run-role to it.""" + service_accounts = get_all_service_accounts(kube_client, namespace=namespace) + service_account_names = [item.metadata.name for item in service_accounts] + if "paasta-remote-run" not in service_account_names: + log.warning( + f"Creating service account paasta-remote-run in {namespace} namespace as it does not exist" + ) + service_account = V1ServiceAccount( + metadata=V1ObjectMeta(name="paasta-remote-run", namespace=namespace) + ) + role_binding = V1RoleBinding( + metadata=V1ObjectMeta( + name="paasta-remote-run-role", + namespace=namespace, + ), + role_ref=V1RoleRef( + api_group="rbac.authorization.k8s.io", + kind="ClusterRole", + name="paasta-remote-run-role", + ), + subjects=[ + V1Subject( + kind="ServiceAccount", + name="paasta-remote-run", + ), + ], + ) + kube_client.core.create_namespaced_service_account( + namespace=namespace, body=service_account + ) + kube_client.rbac.create_namespaced_role_binding( + namespace=namespace, body=role_binding + ) + + def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None: if not namespace.startswith("paastasvc-"): log.debug( From 7474f8e9842a80eb1101a342fc336b1ce29abfbd Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Wed, 6 Nov 2024 18:46:47 -0800 Subject: [PATCH 07/13] Service account, role binding, token --- paasta_tools/api/api.py | 6 +++--- paasta_tools/api/api_docs/oapi.yaml | 2 +- paasta_tools/api/api_docs/swagger.json | 23 ++++++++++++++++++++--- paasta_tools/api/client.py | 2 ++ paasta_tools/api/views/instance.py | 21 ++++++++++++++++----- paasta_tools/cli/cmds/remote_run_2.py | 19 +++++++++++++------ paasta_tools/kubernetes_tools.py | 13 +++++++++---- paasta_tools/paasta_remote_run_2.py | 9 +++++++-- 8 files changed, 71 insertions(+), 24 deletions(-) diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index 0f3ba149a2..5c2cd2f7f4 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -149,15 +149,15 @@ def make_app(global_config=None): "/v1/services/{service}/{instance}/tasks/{task_id}", ) config.add_route( - "remote_run_start", + "remote_run.start", "/v1/remote_run/{service}/{instance}/start", ) config.add_route( - "remote_run_stop", + "remote_run.stop", "/v1/remote_run/{service}/{instance}/stop", ) config.add_route( - "remote_run_token", + "remote_run.token", "/v1/remote_run/{service}/{instance}/token", ) config.add_route("service.list", "/v1/services/{service}") diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index b70e434fab..f3aefc7fec 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1637,7 +1637,7 @@ paths: type: object properties: interactive: - type: bool + type: boolean user: type: string image: diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 4c4b4e42a6..0f5244bdc3 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -884,11 +884,28 @@ "type": "string" }, { - "in": "query", + "in": "body", "description": "Username", - "name": "user", + "name": "json_body", "required": true, - "type": "string" + "schema": { + "type": "object", + "properties": { + "interactive": { + "type": "boolean" + }, + "user": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "interactive", + "user" + ] + } } ] } diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py index fc82dcc15c..3358d4649f 100644 --- a/paasta_tools/api/client.py +++ b/paasta_tools/api/client.py @@ -37,6 +37,7 @@ class PaastaOApiClient: default: paastaapis.DefaultApi resources: paastaapis.ResourcesApi service: paastaapis.ServiceApi + remote_run: paastaapis.RemoteRunApi api_error: Type[paastaapi.ApiException] connection_error: Type[paastaapi.ApiException] timeout_error: Type[paastaapi.ApiException] @@ -68,6 +69,7 @@ def get_paasta_oapi_client_by_url( default=paastaapis.DefaultApi(client), resources=paastaapis.ResourcesApi(client), service=paastaapis.ServiceApi(client), + remote_run=paastaapis.RemoteRunApi(client), api_error=paastaapi.ApiException, connection_error=paastaapi.ApiException, timeout_error=paastaapi.ApiException, diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 224fd8fd4c..c9c3b243b6 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -16,6 +16,7 @@ PaaSTA service instance status/start/stop etc. """ import asyncio +import json import logging import re import traceback @@ -393,18 +394,28 @@ def remote_run_stop(request): pass -@view_config(route_name="remote_run.token", request_method="POST", renderer="json") +@view_config(route_name="remote_run.token", request_method="GET", renderer="json") def remote_run_token(request): - pass + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data.get("user") + try: + token = paasta_remote_run_2.create_exec_token( + service, instance, user, settings.cluster + ) + except: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) + return json.dumps({"token": token}) @view_config(route_name="remote_run.start", request_method="POST", renderer="json") def remote_run_start(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") - user = request.swagger_data.get("user") - interactive = request.swagger_data.get("interactive", True) - recreate = request.swagger_data.get("recreate", True) + user = request.swagger_data["json_body"].get("user") + interactive = request.swagger_data["json_body"].get("interactive", True) + recreate = request.swagger_data["json_body"].get("recreate", True) try: response = paasta_remote_run_2.remote_run_start( diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py index 5896a5aedb..a2e2965909 100644 --- a/paasta_tools/cli/cmds/remote_run_2.py +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -126,14 +126,13 @@ def paasta_remote_run( if not client: print("Cannot get a paasta-api client") exit(1) - + response = client.remote_run.remote_run_start( + service, + instance, + {"user": get_username(), "interactive": True}, + ) try: # TODO add image argument if build - response = client.service.remote_run( - service=service, - instance=instance, - user=get_username(), - ) print("Reponse was: ", response) response = json.loads(response) except client.api_error as exc: @@ -167,6 +166,14 @@ def paasta_remote_run( print("Run the following command to enter your service pod") print(exec_command) + print("Getting token") + try: + token = client.remote_run.remote_run_token( + service=service, instance=instance, user="qlo" + ) + print("Got token:", token) + except: + raise return ret_code diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 90b97afc4e..b74be1f599 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -2047,7 +2047,9 @@ def format_as_kubernetes_job(self) -> V1Job: spec=V1JobSpec( active_deadline_seconds=3600, template=self.get_pod_template_spec( - git_sha=git_sha, system_paasta_config=system_paasta_config + git_sha=git_sha, + system_paasta_config=system_paasta_config, + restart=False, ), ), ) @@ -2182,7 +2184,10 @@ def has_routable_ip( return "false" def get_pod_template_spec( - self, git_sha: str, system_paasta_config: SystemPaastaConfig + self, + git_sha: str, + system_paasta_config: SystemPaastaConfig, + restart: bool = True, ) -> V1PodTemplateSpec: service_namespace_config = load_service_namespace_config( service=self.service, namespace=self.get_nerve_namespace() @@ -2221,7 +2226,7 @@ def get_pod_template_spec( ), share_process_namespace=True, node_selector=self.get_node_selector(), - restart_policy="Always", + restart_policy="Always" if restart else "Never", volumes=self.get_pod_volumes( docker_volumes=docker_volumes + hacheck_sidecar_volumes, aws_ebs_volumes=self.get_aws_ebs_volumes(), @@ -4450,7 +4455,7 @@ def get_kubernetes_secret_env_variables( def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str): """Create a short lived token for exec""" audience = "remote-run-" + user - service_account = "default" + service_account = "paasta-remote-run" token_spec = V1TokenRequestSpec( expiration_seconds=600, audiences=[audience] # minimum allowed by k8s ) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index e2f49caf2a..4553ab0b49 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -12,14 +12,19 @@ from paasta_tools.utils import DEFAULT_SOA_DIR -def create_exec_token(namespace): +def create_exec_token(service, instance, user, cluster): """Creates a short lived token for execing into a pod""" kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + # Load the service deployment settings + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() try: token = create_temp_exec_token(kube_client, namespace, user) except ApiException as E: raise - return token["token"] + return token.status.token def remote_run_start(service, instance, user, cluster, interactive, recreate): From 732d369b3c944b47f6a4824fb262e3ee8105b073 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Fri, 8 Nov 2024 18:58:34 -0800 Subject: [PATCH 08/13] Minor change to token generation --- paasta_tools/cli/cmds/remote_run_2.py | 28 ++++++++++++++------------- paasta_tools/kubernetes_tools.py | 3 +-- paasta_tools/paasta_remote_run_2.py | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py index a2e2965909..8e6a1323e3 100644 --- a/paasta_tools/cli/cmds/remote_run_2.py +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -154,26 +154,28 @@ def paasta_remote_run( return ret_code pod_name, namespace = response["pod_name"], response["namespace"] - exec_command_tmpl = "kubectl{eks}-{cluster} exec -it -n {namespace} {pod} /bin/bash" - exec_command = exec_command_tmpl.format( - eks="-eks" if is_eks else "", cluster=cluster, namespace=namespace, pod=pod_name - ) print("Pod launched successfully:", pod_name) - # TODO figure out how to get this to work - # print('Attaching shell') - # cmd = subprocess.Popen(exec_command.split(' ')) - print("Run the following command to enter your service pod") - print(exec_command) - - print("Getting token") try: token = client.remote_run.remote_run_token( service=service, instance=instance, user="qlo" ) - print("Got token:", token) + token = json.loads(token)["token"] except: raise + + # TODO figure out how to get this to work + exec_command_tmpl = "kubectl{eks}-{cluster} --token {token} exec -it -n {namespace} {pod} -- /bin/bash" + exec_command = exec_command_tmpl.format( + eks="-eks" if is_eks else "", + cluster=cluster, + namespace=namespace, + pod=pod_name, + token=token, + ) + print("Running command", exec_command) + # cmd = subprocess.Popen(exec_command.split(' ')) + return ret_code @@ -183,5 +185,5 @@ def remote_run(args) -> int: "/nail/home/qlo/paasta_config/paasta/" ) return paasta_remote_run( - args.cluster, args.service, args.instance, system_paasta_config, 1, False + args.cluster, args.service, args.instance, system_paasta_config, 1, True ) diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index b74be1f599..5a8ff26777 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -4454,10 +4454,9 @@ def get_kubernetes_secret_env_variables( def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str): """Create a short lived token for exec""" - audience = "remote-run-" + user service_account = "paasta-remote-run" token_spec = V1TokenRequestSpec( - expiration_seconds=600, audiences=[audience] # minimum allowed by k8s + expiration_seconds=600, audiences=[] # minimum allowed by k8s ) request = AuthenticationV1TokenRequest(spec=token_spec) response = kube_client.core.create_namespaced_service_account_token( diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 4553ab0b49..7f7d6fdaf2 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -32,7 +32,7 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate): kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") # TODO hardcoded for now - is_eks = False + is_eks = True # Load the service deployment settings deployment = load_kubernetes_service_config_no_cache( From 926b76d7ac60feeefdb4448a7d387e2515602d98 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Fri, 8 Nov 2024 19:01:33 -0800 Subject: [PATCH 09/13] eks support --- paasta_tools/paasta_remote_run_2.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 7f7d6fdaf2..572851d802 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -3,6 +3,7 @@ from kubernetes.client.exceptions import ApiException +from paasta_tools.eks_tools import load_eks_service_config_no_cache from paasta_tools.kubernetes.application.controller_wrappers import ( get_application_wrapper, ) @@ -15,10 +16,16 @@ def create_exec_token(service, instance, user, cluster): """Creates a short lived token for execing into a pod""" kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + is_eks = True # Load the service deployment settings - deployment = load_kubernetes_service_config_no_cache( - service, instance, cluster, DEFAULT_SOA_DIR - ) + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) namespace = deployment.get_namespace() try: token = create_temp_exec_token(kube_client, namespace, user) @@ -35,9 +42,14 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate): is_eks = True # Load the service deployment settings - deployment = load_kubernetes_service_config_no_cache( - service, instance, cluster, DEFAULT_SOA_DIR - ) + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) namespace = deployment.get_namespace() # Set to interactive mode From a5cc802beb95885e115f1d6d4636a08d79bece5b Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Mon, 11 Nov 2024 18:34:53 -0800 Subject: [PATCH 10/13] pty.spawn, stop --- paasta_tools/api/api_docs/swagger.json | 16 ++++- paasta_tools/api/views/instance.py | 13 +++- paasta_tools/cli/cmds/remote_run_2.py | 90 ++++++++++++++++++++------ paasta_tools/paasta_remote_run_2.py | 32 ++++++--- 4 files changed, 118 insertions(+), 33 deletions(-) diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 0f5244bdc3..5fe2063367 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -947,11 +947,21 @@ "type": "string" }, { - "in": "query", + "in": "body", "description": "Username", - "name": "user", + "name": "json_body", "required": true, - "type": "string" + "schema": { + "type": "object", + "properties": { + "user": { + "type": "string" + } + }, + "required": [ + "user" + ] + } } ] } diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index c9c3b243b6..9be299ab88 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -391,7 +391,18 @@ def instance_mesh_status(request): @view_config(route_name="remote_run.stop", request_method="POST", renderer="json") def remote_run_stop(request): - pass + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data["json_body"].get("user") + + try: + response = paasta_remote_run_2.remote_run_stop( + service, instance, user, settings.cluster + ) + except: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) + return response @view_config(route_name="remote_run.token", request_method="GET", renderer="json") diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py index 8e6a1323e3..67cac4aa20 100644 --- a/paasta_tools/cli/cmds/remote_run_2.py +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -14,7 +14,7 @@ # limitations under the License. import json import os -import subprocess +import pty import sys import time @@ -66,21 +66,27 @@ def add_subparser( help="Run stuff remotely.", description=("'paasta remote-run' runs stuff remotely "), ) - remote_run_parser.add_argument( + subparsers = remote_run_parser.add_subparsers(dest="remote_run_command") + start_parser = subparsers.add_parser( + "start", + help="Start or connect to a remote-run job", + description=("Starts or connects to a remote-run-job"), + ) + start_parser.add_argument( "-b", "--build", dest="build", help="Build the image from current directory", action="store_true", ) - remote_run_parser.add_argument( + start_parser.add_argument( "-y", "--yelpsoa-config-root", dest="yelpsoa_config_root", help="A directory from which yelpsoa-configs should be read from", default=DEFAULT_SOA_DIR, ) - remote_run_parser.add_argument( + start_parser.add_argument( "-I", "--interactive", help=( @@ -91,20 +97,27 @@ def add_subparser( required=False, default=False, ) - add_common_args_to_parser(remote_run_parser) + stop_parser = subparsers.add_parser( + "stop", + help="Stop! In the name of Paasta", + description="Stop your remote-run job if it exists", + ) + add_common_args_to_parser(start_parser) + add_common_args_to_parser(stop_parser) remote_run_parser.set_defaults(command=remote_run) -def paasta_remote_run( - cluster: str, - service: str, - instance: str, +def paasta_remote_run_start( + args, system_paasta_config: SystemPaastaConfig, - verbose: int, - is_eks: bool = False, - build: bool = False, + verbose: int = 0, + is_eks: bool = True, ) -> int: + cluster = args.cluster + service = args.service + instance = args.instance + build = args.build output = [] ret_code = 0 @@ -126,14 +139,14 @@ def paasta_remote_run( if not client: print("Cannot get a paasta-api client") exit(1) + + # TODO add image argument if build response = client.remote_run.remote_run_start( service, instance, {"user": get_username(), "interactive": True}, ) try: - # TODO add image argument if build - print("Reponse was: ", response) response = json.loads(response) except client.api_error as exc: print(exc, file=sys.stderr) @@ -153,8 +166,15 @@ def paasta_remote_run( print("\n".join(output)) return ret_code + if response["status"] == 409: + print( + "A remote-run container already exists. Run remote-run stop first if you'd like a new one." + ) + attach = input("Would you like to attach to it? y/n ") + if attach == "n": + return 0 + print(response) pod_name, namespace = response["pod_name"], response["namespace"] - print("Pod launched successfully:", pod_name) try: token = client.remote_run.remote_run_token( @@ -164,7 +184,6 @@ def paasta_remote_run( except: raise - # TODO figure out how to get this to work exec_command_tmpl = "kubectl{eks}-{cluster} --token {token} exec -it -n {namespace} {pod} -- /bin/bash" exec_command = exec_command_tmpl.format( eks="-eks" if is_eks else "", @@ -173,17 +192,46 @@ def paasta_remote_run( pod=pod_name, token=token, ) - print("Running command", exec_command) - # cmd = subprocess.Popen(exec_command.split(' ')) + cmd = pty.spawn(exec_command.split(" ")) return ret_code +def paasta_remote_run_stop( + args, + system_paasta_config: SystemPaastaConfig, + verbose: int = 0, + is_eks: bool = True, +) -> int: + + cluster = args.cluster + service = args.service + instance = args.instance + + ret_code = 0 + + client = get_paasta_oapi_client( + cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks), + system_paasta_config=system_paasta_config, + ) + if not client: + print("Cannot get a paasta-api client") + exit(1) + response = client.remote_run.remote_run_stop( + service, + instance, + {"user": get_username()}, + ) + print(response) + return ret_code + + def remote_run(args) -> int: """Run stuff, but remotely!""" system_paasta_config = load_system_paasta_config( "/nail/home/qlo/paasta_config/paasta/" ) - return paasta_remote_run( - args.cluster, args.service, args.instance, system_paasta_config, 1, True - ) + if args.remote_run_command == "start": + return paasta_remote_run_start(args, system_paasta_config) + elif args.remote_run_command == "stop": + return paasta_remote_run_stop(args, system_paasta_config) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 572851d802..6daf6c04b5 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -64,18 +64,19 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate): app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) # Launch pod + status = 200 try: app_wrapper.create(kube_client) except ApiException as e: if e.status == 409: # Job already running - return json.dumps({"status": "409", "reason": "Job already running"}) + status = 409 raise pod = wait_until_pod_running(kube_client, namespace, job_name) return json.dumps( - {"status": "200", "pod_name": pod.metadata.name, "namespace": namespace} + {"status": status, "pod_name": pod.metadata.name, "namespace": namespace} ) @@ -117,9 +118,24 @@ def wait_until_pod_running(kube_client, namespace, job_name): return pod -# def remote_run_stop(): -# TODO Should this happen here or should the client kill the deployment directly? -# Load the service deployment settings -# deployment = load_kubernetes_service_config_no_cache( -# service, instance, cluster, DEFAULT_SOA_DIR -# ) +def remote_run_stop(service, instance, user, cluster): + # TODO Overriding the kube client config for now as the api has limited permissions + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + is_eks = True + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() + formatted_job = deployment.format_as_kubernetes_job() + job_name = f"remote-run-{user}-{formatted_job.metadata.name}" + formatted_job.metadata.name = job_name + + app_wrapper = get_application_wrapper(formatted_job) + app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) + app_wrapper.deep_delete(kube_client) + return json.dumps({"status": 200, "message": "Job successfully removed"}) From 5707d6cbe25442250d3c86c7b5c1eb947ed21406 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Mon, 11 Nov 2024 18:41:34 -0800 Subject: [PATCH 11/13] is_eks --- paasta_tools/api/views/instance.py | 24 +++++++++++++++++++++--- paasta_tools/paasta_remote_run_2.py | 12 ++++-------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 9be299ab88..c2a87ce794 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -394,10 +394,11 @@ def remote_run_stop(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") user = request.swagger_data["json_body"].get("user") + is_eks = is_instance_eks(service, instance) try: response = paasta_remote_run_2.remote_run_stop( - service, instance, user, settings.cluster + service, instance, user, settings.cluster, is_eks ) except: error_message = traceback.format_exc() @@ -410,9 +411,10 @@ def remote_run_token(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") user = request.swagger_data.get("user") + is_eks = is_instance_eks(service, instance) try: token = paasta_remote_run_2.create_exec_token( - service, instance, user, settings.cluster + service, instance, user, settings.cluster, is_eks ) except: error_message = traceback.format_exc() @@ -420,6 +422,21 @@ def remote_run_token(request): return json.dumps({"token": token}) +def is_instance_eks(service, instance): + try: + instance_type = validate_service_instance( + service, instance, settings.cluster, settings.soa_dir + ) + except NoConfigurationForServiceError: + error_message = no_configuration_for_service_message( + settings.cluster, + service, + instance, + ) + raise Exception(error_message, 404) + return instance_type == "eks" + + @view_config(route_name="remote_run.start", request_method="POST", renderer="json") def remote_run_start(request): service = request.swagger_data.get("service") @@ -428,9 +445,10 @@ def remote_run_start(request): interactive = request.swagger_data["json_body"].get("interactive", True) recreate = request.swagger_data["json_body"].get("recreate", True) + is_eks = is_instance_eks(service, instance) try: response = paasta_remote_run_2.remote_run_start( - service, instance, user, settings.cluster, interactive, recreate + service, instance, user, settings.cluster, interactive, recreate, is_eks ) except Exception: error_message = traceback.format_exc() diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 6daf6c04b5..273e760a86 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -13,10 +13,10 @@ from paasta_tools.utils import DEFAULT_SOA_DIR -def create_exec_token(service, instance, user, cluster): +def create_exec_token(service, instance, user, cluster, is_eks): """Creates a short lived token for execing into a pod""" kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") - is_eks = True + # Load the service deployment settings if is_eks: deployment = load_eks_service_config_no_cache( @@ -34,13 +34,10 @@ def create_exec_token(service, instance, user, cluster): return token.status.token -def remote_run_start(service, instance, user, cluster, interactive, recreate): +def remote_run_start(service, instance, user, cluster, interactive, recreate, is_eks): # TODO Overriding the kube client config for now as the api has limited permissions kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") - # TODO hardcoded for now - is_eks = True - # Load the service deployment settings if is_eks: deployment = load_eks_service_config_no_cache( @@ -118,10 +115,9 @@ def wait_until_pod_running(kube_client, namespace, job_name): return pod -def remote_run_stop(service, instance, user, cluster): +def remote_run_stop(service, instance, user, cluster, is_eks): # TODO Overriding the kube client config for now as the api has limited permissions kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") - is_eks = True if is_eks: deployment = load_eks_service_config_no_cache( service, instance, cluster, DEFAULT_SOA_DIR From 9a302a28acfffbda1e40f41711af7070e8d84549 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Thu, 14 Nov 2024 18:33:41 -0800 Subject: [PATCH 12/13] Use pod specific roles and service accounts --- paasta_tools/kubernetes_tools.py | 111 +++++++++++++++++----------- paasta_tools/paasta_remote_run_2.py | 12 ++- 2 files changed, 80 insertions(+), 43 deletions(-) diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 5a8ff26777..a557ecd686 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -19,6 +19,7 @@ import math import os import re +import time from datetime import datetime from datetime import timezone from enum import Enum @@ -98,11 +99,13 @@ from kubernetes.client import V1PodSecurityContext from kubernetes.client import V1PodSpec from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1PolicyRule from kubernetes.client import V1PreferredSchedulingTerm from kubernetes.client import V1Probe from kubernetes.client import V1ProjectedVolumeSource from kubernetes.client import V1ReplicaSet from kubernetes.client import V1ResourceRequirements +from kubernetes.client import V1Role from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1RollingUpdateDeployment @@ -2834,7 +2837,6 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None: else: raise - ensure_paasta_remote_run_service_account(kube_client, namespace) ensure_paasta_api_rolebinding(kube_client, namespace) ensure_paasta_namespace_limits(kube_client, namespace) @@ -2868,45 +2870,6 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No ) -def ensure_paasta_remote_run_service_account( - kube_client: KubeClient, namespace: str -) -> None: - """Checks whether a ServiceAccount named paasta-remote-run exists in the namespace, - and if it doesn't, create it and bind paasta-remote-run-role to it.""" - service_accounts = get_all_service_accounts(kube_client, namespace=namespace) - service_account_names = [item.metadata.name for item in service_accounts] - if "paasta-remote-run" not in service_account_names: - log.warning( - f"Creating service account paasta-remote-run in {namespace} namespace as it does not exist" - ) - service_account = V1ServiceAccount( - metadata=V1ObjectMeta(name="paasta-remote-run", namespace=namespace) - ) - role_binding = V1RoleBinding( - metadata=V1ObjectMeta( - name="paasta-remote-run-role", - namespace=namespace, - ), - role_ref=V1RoleRef( - api_group="rbac.authorization.k8s.io", - kind="ClusterRole", - name="paasta-remote-run-role", - ), - subjects=[ - V1Subject( - kind="ServiceAccount", - name="paasta-remote-run", - ), - ], - ) - kube_client.core.create_namespaced_service_account( - namespace=namespace, body=service_account - ) - kube_client.rbac.create_namespaced_role_binding( - namespace=namespace, body=role_binding - ) - - def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None: if not namespace.startswith("paastasvc-"): log.debug( @@ -4452,9 +4415,10 @@ def get_kubernetes_secret_env_variables( return decrypted_secrets -def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str): +def create_temp_exec_token( + kube_client: KubeClient, namespace: str, service_account: str +): """Create a short lived token for exec""" - service_account = "paasta-remote-run" token_spec = V1TokenRequestSpec( expiration_seconds=600, audiences=[] # minimum allowed by k8s ) @@ -4560,3 +4524,66 @@ def add_volumes_for_authenticating_services( ): config_volumes = [token_config, *config_volumes] return config_volumes + + +def create_pod_scoped_role(kube_client, namespace, user, pod_name): + pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12] + policy = V1PolicyRule( + verbs=["create", "get"], + resources=["pods", "pods/exec"], + resource_names=[pod_name], + api_groups=[""], + ) + role_name = f"remote-run-role-{pod_name_hash}" + role = V1Role( + rules=[policy], + metadata=V1ObjectMeta( + name=role_name, + labels={"CreationTime": str(int(time.time())), "PodOwner": user}, + ), + ) + + kube_client.core.create_namespaced_role(namespace=namespace, body=role) + return role_name + + +def bind_role_to_service_account(kube_client, namespace, service_account, role): + role_binding = V1RoleBinding( + metadata=V1ObjectMeta( + name=f"binding-{role}", + namespace=namespace, + ), + role_ref=V1RoleRef( + api_group="rbac.authorization.k8s.io", + kind="Role", + name=role, + ), + subjects=[ + V1Subject( + kind="ServiceAccount", + name=service_account, + ), + ], + ) + kube_client.rbac.create_namespaced_role_binding( + namespace=namespace, body=role_binding + ) + + +def create_paasta_remote_run_service_account( + kube_client: KubeClient, namespace: str, username: str, pod_name: str +) -> None: + pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12] + service_account_name = f"remote-run-{username}-{pod_name_hash}" + service_accounts = get_all_service_accounts(kube_client, namespace=namespace) + service_account_names = [item.metadata.name for item in service_accounts] + if service_account_name in service_account_names: + return service_account_name + + service_account = V1ServiceAccount( + metadata=V1ObjectMeta(name=service_account_name, namespace=namespace) + ) + kube_client.core.create_namespaced_service_account( + namespace=namespace, body=service_account + ) + return service_account_name diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py index 273e760a86..75cedb2d63 100755 --- a/paasta_tools/paasta_remote_run_2.py +++ b/paasta_tools/paasta_remote_run_2.py @@ -27,8 +27,18 @@ def create_exec_token(service, instance, user, cluster, is_eks): service, instance, cluster, DEFAULT_SOA_DIR ) namespace = deployment.get_namespace() + formatted_job = deployment.format_as_kubernetes_job() + job_name = f"remote-run-{user}-{formatted_job.metadata.name}" + try: - token = create_temp_exec_token(kube_client, namespace, user) + pod = find_pod(kube_client, namespace, job_name) + pod_name = pod.metadata.name + service_account = create_paasta_remote_run_service_account( + kubeclient, namespace, user, pod_name + ) + role = create_pod_scoped_role(kube_client, namespace, pod_name, user) + bind_role_service_account(kube_client, namespace, service_account, role) + token = create_temp_exec_token(kube_client, namespace, service_account) except ApiException as E: raise return token.status.token From 19f90ab4da5817abd12095d319d09d71512df89d Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Mon, 18 Nov 2024 18:04:24 -0800 Subject: [PATCH 13/13] Removal of remote-run service accounts and roles --- paasta_tools/kubernetes_tools.py | 50 ++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index a557ecd686..cef7b239b0 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -19,8 +19,8 @@ import math import os import re -import time from datetime import datetime +from datetime import timedelta from datetime import timezone from enum import Enum from functools import lru_cache @@ -2839,6 +2839,7 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None: ensure_paasta_api_rolebinding(kube_client, namespace) ensure_paasta_namespace_limits(kube_client, namespace) + remove_remote_run_accounts_and_roles(kube_client, namespace) def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> None: @@ -4539,7 +4540,7 @@ def create_pod_scoped_role(kube_client, namespace, user, pod_name): rules=[policy], metadata=V1ObjectMeta( name=role_name, - labels={"CreationTime": str(int(time.time())), "PodOwner": user}, + labels={"PodOwner": user}, ), ) @@ -4581,9 +4582,52 @@ def create_paasta_remote_run_service_account( return service_account_name service_account = V1ServiceAccount( - metadata=V1ObjectMeta(name=service_account_name, namespace=namespace) + metadata=V1ObjectMeta( + name=service_account_name, + namespace=namespace, + labels={"PodOwner": username}, + ) ) kube_client.core.create_namespaced_service_account( namespace=namespace, body=service_account ) return service_account_name + + +def get_namespaced_roles(kube_client, namespace): + return kube_client.rbac.list_namespaced_role(namespace).items + + +def remove_remote_run_accounts_and_roles( + kube_client: KubeClient, + namespace: str, +) -> None: + service_accounts = get_all_service_accounts(kube_client, namespace) + roles = get_namespaced_roles(kube_client, namespace) + + current_time = datetime.utcnow().replace(tzinfo=timezone.utc) + age_limit = timedelta(seconds=60) + + for delete_func, entity_list in ( + (delete_namespaced_service_account, service_accounts), + (delete_namespaced_role, roles), + ): + for entity in entity_list: + if not entity.metadata.name.startswith("remote-run-"): + continue + entity_age = current_time - entity.metadata.creation_timestamp + if entity_age < age_limit: + continue + delete_func(kube_client, namespace, entity) + + +def delete_namespaced_service_account( + kube_client: KubeClient, namespace: str, service_account: V1ServiceAccount +): + kube_client.core.delete_namespaced_service_account( + service_account.metadata.name, namespace + ) + + +def delete_namespaced_role(kube_client: KubeClient, namespace: str, role: V1Role): + kube_client.rbac.delete_namespaced_role(role.metadata.name, namespace)