From 97b973be800f32cf226e76223956e0fa9130d627 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Wed, 30 Oct 2024 17:13:19 -0700
Subject: [PATCH 01/13] Checkpoint commit

---
 paasta_tools/api/api.py                   |   4 +
 paasta_tools/api/api_docs/oapi.yaml       |  36 ++++++
 paasta_tools/api/api_docs/swagger.json    |  46 ++++++++
 paasta_tools/api/views/instance.py        |  16 +++
 paasta_tools/cli/cli.py                   |   1 +
 paasta_tools/cli/cmds/remote_run_2.py     | 109 +++++++++++++++++
 paasta_tools/paasta_remote_run_2.py       |  51 ++++++++
 paasta_tools/paastaapi/api/service_api.py | 136 ++++++++++++++++++++++
 8 files changed, 399 insertions(+)
 create mode 100644 paasta_tools/cli/cmds/remote_run_2.py
 create mode 100755 paasta_tools/paasta_remote_run_2.py

diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py
index f01a48dac1..a0f20d89c8 100644
--- a/paasta_tools/api/api.py
+++ b/paasta_tools/api/api.py
@@ -148,6 +148,10 @@ def make_app(global_config=None):
         "service.instance.tasks.task",
         "/v1/services/{service}/{instance}/tasks/{task_id}",
     )
+    config.add_route(
+        "service.instance.remote_run",
+        "/v1/services/{service}/{instance}/remote_run",
+    )
     config.add_route("service.list", "/v1/services/{service}")
     config.add_route("services", "/v1/services")
     config.add_route(
diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index 1518b6b65d..a5b4310684 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1627,6 +1627,42 @@ paths:
       summary: Get mesos task of service_name.instance_name by task_id
       tags:
       - service
+  /services/{service}/{instance}/remote_run:
+    post:
+      operationId: remote_run
+      parameters:
+      - description: Service name
+        in: path
+        name: service
+        required: true
+        schema:
+          type: string
+      - description: Instance name
+        in: path
+        name: instance
+        required: true
+        schema:
+          type: string
+      - description: Username
+        in: query
+        name: user
+        required: true
+        schema:
+          type: string
+      responses:
+        "200":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: The service is delayed for these possible reasons
+        "404":
+          description: Deployment key not found
+        "500":
+          description: Failure
+      summary: Launch a remote-run pod
+      tags:
+      - service
   /version:
     get:
       operationId: showVersion
diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json
index 9caa3c8cac..7ee0a58740 100644
--- a/paasta_tools/api/api_docs/swagger.json
+++ b/paasta_tools/api/api_docs/swagger.json
@@ -846,6 +846,52 @@
                     }
                 ]
             }
+        },
+        "/services/{service}/{instance}/remote_run": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "It worked!",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "404": {
+                        "description": "Deployment key not found"
+                    },
+                    "500": {
+                        "description": "Failure"
+                    }
+                },
+                "summary": "Do a remote run",
+                "operationId": "remote_run",
+                "tags": [
+                    "service"
+                ],
+                "parameters": [
+                    {
+                        "in": "path",
+                        "description": "Service name",
+                        "name": "service",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "path",
+                        "description": "Instance name",
+                        "name": "instance",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "query",
+                        "description": "Username",
+                        "name": "user",
+                        "required": true,
+                        "type": "string"
+                    }
+                ]
+            }
         }
     },
     "definitions": {
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 945a1d5187..274fe782af 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -31,6 +31,7 @@
 
 import paasta_tools.mesos.exceptions as mesos_exceptions
 from paasta_tools import paasta_remote_run
+from paasta_tools import paasta_remote_run_2
 from paasta_tools import tron_tools
 from paasta_tools.api import settings
 from paasta_tools.api.views.exception import ApiFailure
@@ -385,3 +386,18 @@ def instance_mesh_status(request):
         raise ApiFailure(error_message, 500)
 
     return instance_mesh
+
+
+@view_config(
+    route_name="service.instance.remote_run", request_method="POST", renderer="json"
+)
+def remote_run(request):
+    service = request.swagger_data.get("service")
+    instance = request.swagger_data.get("instance")
+    user = request.swagger_data.get("user")
+
+    try:
+        paasta_remote_run_2.remote_run_start(service, instance, user, settings.cluster)
+    except Exception:
+        error_message = traceback.format_exc()
+        raise ApiFailure(error_message, 500)
diff --git a/paasta_tools/cli/cli.py b/paasta_tools/cli/cli.py
index 92f9dd222b..22af8a138d 100755
--- a/paasta_tools/cli/cli.py
+++ b/paasta_tools/cli/cli.py
@@ -118,6 +118,7 @@ def add_subparser(command, subparsers):
     "pause_service_autoscaler": "pause_service_autoscaler",
     "push-to-registry": "push_to_registry",
     "remote-run": "remote_run",
+    "remote-run-2": "remote_run_2",
     "rollback": "rollback",
     "secret": "secret",
     "security-check": "security_check",
diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py
new file mode 100644
index 0000000000..dd380ccaf1
--- /dev/null
+++ b/paasta_tools/cli/cmds/remote_run_2.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# Copyright 2015-2016 Yelp Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paasta_tools.api.client import get_paasta_oapi_client
+from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
+from paasta_tools.cli.utils import lazy_choices_completer
+from paasta_tools.utils import list_services
+from paasta_tools.utils import load_system_paasta_config
+from paasta_tools.utils import PaastaColors
+from paasta_tools.utils import SystemPaastaConfig
+
+
+def add_common_args_to_parser(parser):
+    parser.add_argument(
+        "-s",
+        "--service",
+        help="The name of the service you wish to inspect. Required.",
+        required=True,
+    ).completer = lazy_choices_completer(list_services)
+    parser.add_argument(
+        "-i",
+        "--instance",
+        help=(
+            "Simulate a docker run for a particular instance of the "
+            "service, like 'main' or 'canary'. Required."
+        ),
+        required=True,
+    )
+    parser.add_argument(
+        "-c",
+        "--cluster",
+        help=(
+            "The name of the cluster you wish to run your task on. "
+            "If omitted, uses the default cluster defined in the paasta "
+            f"remote-run configs."
+        ),
+    )
+
+
+def add_subparser(
+    subparsers,
+) -> None:
+    remote_run_parser = subparsers.add_parser(
+        "remote-run-2",
+        help="Run stuff remotely.",
+        description=("'paasta remote-run' runs stuff remotely "),
+    )
+    add_common_args_to_parser(remote_run_parser)
+    remote_run_parser.set_defaults(command=remote_run)
+
+
+def paasta_remote_run(
+    cluster: str,
+    service: str,
+    instance: str,
+    system_paasta_config: SystemPaastaConfig,
+    verbose: int,
+    is_eks: bool = False,
+) -> int:
+    output = []
+    ret_code = 0
+    client = get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
+        system_paasta_config=system_paasta_config,
+    )
+    if not client:
+        print("Cannot get a paasta-api client")
+        exit(1)
+
+    try:
+        response = client.service.remote_run(
+            service=service, instance=instance, user="qlo"
+        )
+        print(response)
+    except client.api_error as exc:
+        output.append(PaastaColors.red(exc.reason))
+        ret_code = exc.status
+    except (client.connection_error, client.timeout_error) as exc:
+        output.append(
+            PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}")
+        )
+        ret_code = 1
+    except Exception as e:
+        output.append(PaastaColors.red(f"Exception when talking to the API:"))
+        output.append(str(e))
+        ret_code = 1
+
+    print("\n".join(output))
+
+    return ret_code
+
+
+def remote_run(args) -> int:
+    """Run stuff, but remotely!"""
+    system_paasta_config = load_system_paasta_config()
+    paasta_remote_run(
+        args.cluster, args.service, args.instance, system_paasta_config, 1, False
+    )
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
new file mode 100755
index 0000000000..ed60a73aa7
--- /dev/null
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -0,0 +1,51 @@
+from time import sleep
+
+from paasta_tools.kubernetes.application.controller_wrappers import (
+    get_application_wrapper,
+)
+from paasta_tools.kubernetes_tools import KubeClient
+from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache
+from paasta_tools.utils import DEFAULT_SOA_DIR
+
+
+def remote_run_start(service, instance, user, cluster):
+    kube_client = KubeClient()
+    is_eks = False
+    deployment = load_kubernetes_service_config_no_cache(
+        service, instance, cluster, DEFAULT_SOA_DIR
+    )
+    namespace = deployment.get_namespace()
+
+    formatted_application = deployment.format_kubernetes_app()
+    formatted_application.metadata.name += f"-remote-run-{user}"
+    pod_name = formatted_application.metadata.name
+    app_wrapper = get_application_wrapper(formatted_application)
+    app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks)
+    app_wrapper.create(kube_client)
+
+    # Get pod status and name
+    for retry in range(5):
+        pod_list = kube_client.core.list_namespaced_pod(namespace)
+        matching_pod = None
+        for pod in pod_list.items:
+            if pod.metadata.name.startswith(pod_name):
+                matching_pod = pod
+                break
+
+        if not matching_pod:
+            sleep(1)
+            continue
+
+        if pod.status.phase == "Running":
+            break
+        elif pod.status.phase != "Initializing":
+            raise Exception(f"Pod state is {pod.status.phase}")
+
+    if not matching_pod:
+        raise Exception("No matching pod")
+
+    return {"Status": "Success!", "pod_name": pod.metadata.name, "namespace": namespace}
+
+
+def remote_run_stop():
+    pass
diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py
index 63901f7cee..7db7aec9fe 100644
--- a/paasta_tools/paastaapi/api/service_api.py
+++ b/paasta_tools/paastaapi/api/service_api.py
@@ -1299,6 +1299,142 @@ def __mesh_instance(
             callable=__mesh_instance
         )
 
+        def __remote_run(
+            self,
+            service,
+            instance,
+            user,
+            **kwargs
+        ):
+            """Launch a remote-run pod  # noqa: E501
+
+            This method makes a synchronous HTTP request by default. To make an
+            asynchronous HTTP request, please pass async_req=True
+
+            >>> thread = api.remote_run(service, instance, user, async_req=True)
+            >>> result = thread.get()
+
+            Args:
+                service (str): Service name
+                instance (str): Instance name
+                user (str): Username
+
+            Keyword Args:
+                _return_http_data_only (bool): response data without head status
+                    code and headers. Default is True.
+                _preload_content (bool): if False, the urllib3.HTTPResponse object
+                    will be returned without reading/decoding response data.
+                    Default is True.
+                _request_timeout (float/tuple): timeout setting for this request. If one
+                    number provided, it will be total request timeout. It can also
+                    be a pair (tuple) of (connection, read) timeouts.
+                    Default is None.
+                _check_input_type (bool): specifies if type checking
+                    should be done one the data sent to the server.
+                    Default is True.
+                _check_return_type (bool): specifies if type checking
+                    should be done one the data received from the server.
+                    Default is True.
+                _host_index (int/None): specifies the index of the server
+                    that we want to use.
+                    Default is read from the configuration.
+                async_req (bool): execute request asynchronously
+
+            Returns:
+                str
+                    If the method is called asynchronously, returns the request
+                    thread.
+            """
+            kwargs['async_req'] = kwargs.get(
+                'async_req', False
+            )
+            kwargs['_return_http_data_only'] = kwargs.get(
+                '_return_http_data_only', True
+            )
+            kwargs['_preload_content'] = kwargs.get(
+                '_preload_content', True
+            )
+            kwargs['_request_timeout'] = kwargs.get(
+                '_request_timeout', None
+            )
+            kwargs['_check_input_type'] = kwargs.get(
+                '_check_input_type', True
+            )
+            kwargs['_check_return_type'] = kwargs.get(
+                '_check_return_type', True
+            )
+            kwargs['_host_index'] = kwargs.get('_host_index')
+            kwargs['service'] = \
+                service
+            kwargs['instance'] = \
+                instance
+            kwargs['user'] = \
+                user
+            return self.call_with_http_info(**kwargs)
+
+        self.remote_run = Endpoint(
+            settings={
+                'response_type': (str,),
+                'auth': [],
+                'endpoint_path': '/services/{service}/{instance}/remote_run',
+                'operation_id': 'remote_run',
+                'http_method': 'POST',
+                'servers': None,
+            },
+            params_map={
+                'all': [
+                    'service',
+                    'instance',
+                    'user',
+                ],
+                'required': [
+                    'service',
+                    'instance',
+                    'user',
+                ],
+                'nullable': [
+                ],
+                'enum': [
+                ],
+                'validation': [
+                ]
+            },
+            root_map={
+                'validations': {
+                },
+                'allowed_values': {
+                },
+                'openapi_types': {
+                    'service':
+                        (str,),
+                    'instance':
+                        (str,),
+                    'user':
+                        (str,),
+                },
+                'attribute_map': {
+                    'service': 'service',
+                    'instance': 'instance',
+                    'user': 'user',
+                },
+                'location_map': {
+                    'service': 'path',
+                    'instance': 'path',
+                    'user': 'query',
+                },
+                'collection_format_map': {
+                }
+            },
+            headers_map={
+                'accept': [
+                    'application/json'
+                ],
+                'content_type': [],
+            },
+            api_client=api_client,
+            callable=__remote_run
+        )
+
         def __status_instance(
             self,
             service,

From 3e049e10bf64bff087da0ce816d8de8653ad4b40 Mon Sep 17 00:00:00 2001
From: Matteo Piano <mpiano@yelp.com>
Date: Fri, 1 Nov 2024 07:53:21 -0700
Subject: [PATCH 02/13] auth support for paasta APIs

---
 paasta_tools/api/api.py             |  19 +++++
 paasta_tools/api/tweens/__init__.py |   6 ++
 paasta_tools/api/tweens/auth.py     | 113 ++++++++++++++++++++++++++++
 requirements.txt                    |   2 +-
 tests/api/tweens/test_auth.py       |  95 +++++++++++++++++++++++
 5 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 paasta_tools/api/tweens/auth.py
 create mode 100644 tests/api/tweens/test_auth.py

diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py
index f01a48dac1..023a4f8e62 100644
--- a/paasta_tools/api/api.py
+++ b/paasta_tools/api/api.py
@@ -31,6 +31,7 @@
 import paasta_tools.api
 from paasta_tools import kubernetes_tools
 from paasta_tools.api import settings
+from paasta_tools.api.tweens import auth
 from paasta_tools.api.tweens import profiling
 from paasta_tools.api.tweens import request_logger
 from paasta_tools.utils import load_system_paasta_config
@@ -79,6 +80,18 @@ def parse_paasta_api_args():
         default=4,
         help="Number of gunicorn workers to run",
     )
+    parser.add_argument(
+        "--auth-endpoint",
+        type=str,
+        default="",
+        help="External API authorization endpoint",
+    )
+    parser.add_argument(
+        "--auth-enforce",
+        action="store_true",
+        default=False,
+        help="Enforce API authorization",
+    )
     args = parser.parse_args()
     return args
 
@@ -105,6 +118,7 @@ def make_app(global_config=None):
 
     config.include("pyramid_swagger")
     config.include(request_logger)
+    config.include(auth)
 
     config.add_route(
         "flink.service.instance.jobs", "/v1/flink/{service}/{instance}/jobs"
@@ -257,6 +271,11 @@ def main(argv=None):
     if args.cluster:
         os.environ["PAASTA_API_CLUSTER"] = args.cluster
 
+    if args.auth_endpoint:
+        os.environ["PAASTA_API_AUTH_ENDPOINT"] = args.auth_endpoint
+        if args.auth_enforce:
+            os.environ["PAASTA_API_AUTH_ENFORCE"] = "1"
+
     gunicorn_args = [
         "gunicorn",
         "-w",
diff --git a/paasta_tools/api/tweens/__init__.py b/paasta_tools/api/tweens/__init__.py
index e69de29bb2..e8d08dd58d 100644
--- a/paasta_tools/api/tweens/__init__.py
+++ b/paasta_tools/api/tweens/__init__.py
@@ -0,0 +1,6 @@
+from typing import Callable
+
+from pyramid.request import Request
+from pyramid.response import Response
+
+Handler = Callable[[Request], Response]
diff --git a/paasta_tools/api/tweens/auth.py b/paasta_tools/api/tweens/auth.py
new file mode 100644
index 0000000000..2e2ff7ef3d
--- /dev/null
+++ b/paasta_tools/api/tweens/auth.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# Copyright 2015-2016 Yelp Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import os
+from typing import NamedTuple
+
+import cachetools.func
+import pyramid
+import requests
+from pyramid.config import Configurator
+from pyramid.httpexceptions import HTTPForbidden
+from pyramid.registry import Registry
+from pyramid.request import Request
+from pyramid.response import Response
+
+from paasta_tools.api.tweens import Handler
+
+
+logger = logging.getLogger(__name__)
+AUTH_CACHE_SIZE = 50000
+AUTH_CACHE_TTL = 30 * 60
+
+
+class AuthorizationOutcome(NamedTuple):
+    authorized: bool
+    reason: str
+
+
+class AuthTweenFactory:
+    def __init__(self, handler: Handler, registry: Registry) -> None:
+        self.handler = handler
+        self.registry = registry
+        self.enforce = bool(os.getenv("PAASTA_API_AUTH_ENFORCE", ""))
+        self.endpoint = os.getenv("PAASTA_API_AUTH_ENDPOINT")
+        self.session = requests.Session()
+
+    def __call__(self, request: Request) -> Response:
+        """
+        Extracts relevant metadata from request, and checks if it is authorized
+        """
+        token = request.headers.get("Authorization", "").strip()
+        token = token.split()[-1] if token else ""  # removes "Bearer" prefix
+        auth_outcome = self.is_request_authorized(request.path, token, request.method)
+        if self.enforce and not auth_outcome.authorized:
+            return HTTPForbidden(
+                body=json.dumps({"reason": auth_outcome.reason}),
+                headers={"X-Auth-Failure-Reason": auth_outcome.reason},
+                content_type="application/json",
+                charset="utf-8",
+            )
+        return self.handler(request)
+
+    @cachetools.func.ttl_cache(maxsize=AUTH_CACHE_SIZE, ttl=AUTH_CACHE_TTL)
+    def is_request_authorized(
+        self, path: str, token: str, method: str
+    ) -> AuthorizationOutcome:
+        """Check if API request is authorized
+
+        :param str path: API path
+        :param str token: authentication token
+        :param str method: http method
+        :return: auth outcome
+        """
+        try:
+            response = self.session.post(
+                url=self.endpoint,
+                json={
+                    "input": {
+                        "path": path,
+                        "backend": "paasta",
+                        "token": token,
+                        "method": method,
+                    },
+                },
+                timeout=2,
+            ).json()
+        except Exception as e:
+            logger.exception(f"Issue communicating with auth endpoint: {e}")
+            return AuthorizationOutcome(False, "Auth backend error")
+
+        if "result" not in response or "allowed" not in response["result"]:
+            return AuthorizationOutcome(False, "Malformed auth response")
+
+        if not response["result"]["allowed"]:
+            reason = response["result"].get("reason", "Denied")
+            return AuthorizationOutcome(False, reason)
+
+        reason = response["result"].get("reason", "Ok")
+        return AuthorizationOutcome(True, reason)
+
+
+def includeme(config: Configurator):
+    if os.getenv("PAASTA_API_AUTH_ENDPOINT"):
+        config.add_tween(
+            "paasta_tools.api.tweens.auth.AuthTweenFactory",
+            under=(
+                pyramid.tweens.INGRESS,
+                "paasta_tools.api.tweens.request_logger.request_logger_tween_factory",
+            ),
+        )
diff --git a/requirements.txt b/requirements.txt
index 9887bf9df6..f8c1a05e93 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ boto3-type-annotations==0.3.1
 botocore==1.34.22
 bravado==10.4.1
 bravado-core==5.12.1
-cachetools==2.0.1
+cachetools==5.5.0
 certifi==2017.11.5
 chardet==3.0.4
 choice==0.1
diff --git a/tests/api/tweens/test_auth.py b/tests/api/tweens/test_auth.py
new file mode 100644
index 0000000000..21ad97bafd
--- /dev/null
+++ b/tests/api/tweens/test_auth.py
@@ -0,0 +1,95 @@
+# Copyright 2015-2016 Yelp Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from pyramid.httpexceptions import HTTPForbidden
+
+from paasta_tools.api.tweens import auth
+
+
+@pytest.fixture
+def mock_auth_tween():
+    with patch.dict(
+        os.environ,
+        {
+            "PAASTA_API_AUTH_ENFORCE": "1",
+            "PAASTA_API_AUTH_ENDPOINT": "http://localhost:31337",
+        },
+    ):
+        with patch("paasta_tools.api.tweens.auth.requests"):
+            yield auth.AuthTweenFactory(MagicMock(), MagicMock())
+
+
+def test_call(mock_auth_tween):
+    mock_request = MagicMock(
+        path="/something",
+        method="post",
+        headers={"Authorization": "Bearer aaa.bbb.ccc"},
+    )
+    with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized:
+        mock_is_authorized.return_value = auth.AuthorizationOutcome(True, "Ok")
+        mock_auth_tween(mock_request)
+        mock_is_authorized.assert_called_once_with("/something", "aaa.bbb.ccc", "post")
+        mock_auth_tween.handler.assert_called_once_with(mock_request)
+
+
+def test_call_deny(mock_auth_tween):
+    mock_request = MagicMock(
+        path="/something",
+        method="post",
+        headers={"Authorization": "Bearer aaa.bbb.ccc"},
+    )
+    with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized:
+        mock_is_authorized.return_value = auth.AuthorizationOutcome(False, "Denied")
+        response = mock_auth_tween(mock_request)
+        assert isinstance(response, HTTPForbidden)
+        assert response.headers.get("X-Auth-Failure-Reason") == "Denied"
+
+
+def test_is_request_authorized(mock_auth_tween):
+    mock_auth_tween.session.post.return_value.json.return_value = {
+        "result": {"allowed": True, "reason": "User allowed"}
+    }
+    assert mock_auth_tween.is_request_authorized(
+        "/allowed", "aaa.bbb.ccc", "get"
+    ) == auth.AuthorizationOutcome(True, "User allowed")
+    mock_auth_tween.session.post.assert_called_once_with(
+        url="http://localhost:31337",
+        json={
+            "input": {
+                "path": "/allowed",
+                "backend": "paasta",
+                "token": "aaa.bbb.ccc",
+                "method": "get",
+            }
+        },
+        timeout=2,
+    )
+
+
+def test_is_request_authorized_fail(mock_auth_tween):
+    mock_auth_tween.session.post.side_effect = Exception
+    assert mock_auth_tween.is_request_authorized(
+        "/allowed", "eee.ddd.fff", "get"
+    ) == auth.AuthorizationOutcome(False, "Auth backend error")
+
+
+def test_is_request_authorized_malformed(mock_auth_tween):
+    mock_auth_tween.session.post.return_value.json.return_value = {"foo": "bar"}
+    assert mock_auth_tween.is_request_authorized(
+        "/allowed", "eee.ddd.fff", "post"
+    ) == auth.AuthorizationOutcome(False, "Malformed auth response")

From d99aaf1c69901b7ea82a21bdcda7218aede846ac Mon Sep 17 00:00:00 2001
From: Matteo Piano <mpiano@yelp.com>
Date: Fri, 1 Nov 2024 09:18:07 -0700
Subject: [PATCH 03/13] allow passing auth token to API client

---
 paasta_tools/api/client.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py
index fc82dcc15c..7cde15d7df 100644
--- a/paasta_tools/api/client.py
+++ b/paasta_tools/api/client.py
@@ -48,6 +48,7 @@ def get_paasta_oapi_client_by_url(
     cert_file: Optional[str] = None,
     key_file: Optional[str] = None,
     ssl_ca_cert: Optional[str] = None,
+    auth_token: str = "",
 ) -> PaastaOApiClient:
     server_variables = dict(scheme=parsed_url.scheme, host=parsed_url.netloc)
     config = paastaapi.Configuration(
@@ -63,6 +64,9 @@ def get_paasta_oapi_client_by_url(
     client.rest_client.pool_manager.connection_pool_kw[
         "timeout"
     ] = load_system_paasta_config().get_api_client_timeout()
+    # SEC-19555: support auth in PaaSTA APIs
+    if auth_token:
+        client.set_default_header("Authorization", f"Bearer {auth_token}")
     return PaastaOApiClient(
         autoscaler=paastaapis.AutoscalerApi(client),
         default=paastaapis.DefaultApi(client),
@@ -79,6 +83,7 @@ def get_paasta_oapi_client(
     cluster: str = None,
     system_paasta_config: SystemPaastaConfig = None,
     http_res: bool = False,
+    auth_token: str = "",
 ) -> Optional[PaastaOApiClient]:
     if not system_paasta_config:
         system_paasta_config = load_system_paasta_config()
@@ -94,4 +99,6 @@ def get_paasta_oapi_client(
     parsed = urlparse(api_endpoints[cluster])
     cert_file = key_file = ssl_ca_cert = None
 
-    return get_paasta_oapi_client_by_url(parsed, cert_file, key_file, ssl_ca_cert)
+    return get_paasta_oapi_client_by_url(
+        parsed, cert_file, key_file, ssl_ca_cert, auth_token
+    )

From 18a72fb8dc80b8296062235bee1aaee11a0d3360 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Fri, 1 Nov 2024 17:24:21 -0700
Subject: [PATCH 04/13] More remote_run stuff

---
 paasta_tools/api/api_docs/oapi.yaml       | 22 ++++--
 paasta_tools/api/views/instance.py        |  8 ++-
 paasta_tools/cli/cmds/remote_run_2.py     | 81 +++++++++++++++++++++--
 paasta_tools/paasta_remote_run_2.py       | 80 +++++++++++++++++-----
 paasta_tools/paastaapi/api/service_api.py | 26 ++++----
 paasta_tools/paastaapi/models/__init__.py |  1 +
 6 files changed, 179 insertions(+), 39 deletions(-)

diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index a5b4310684..400f627735 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1630,6 +1630,22 @@ paths:
   /services/{service}/{instance}/remote_run:
     post:
       operationId: remote_run
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                interactive:
+                  type: bool
+                user:
+                  type: string
+                image:
+                  type: string
+              required:
+               - interactive
+               - user
+        required: true
       parameters:
       - description: Service name
         in: path
@@ -1643,12 +1659,6 @@ paths:
         required: true
         schema:
           type: string
-      - description: Username
-        in: query
-        name: user
-        required: true
-        schema:
-          type: string
       responses:
         "200":
           content:
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 274fe782af..4c4e161bb6 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -395,9 +395,15 @@ def remote_run(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
     user = request.swagger_data.get("user")
+    interactive = request.swagger_data.get("interactive", True)
+    recreate = request.swagger_data.get("recreate", True)
 
     try:
-        paasta_remote_run_2.remote_run_start(service, instance, user, settings.cluster)
+        response = paasta_remote_run_2.remote_run_start(
+            service, instance, user, settings.cluster, interactive, recreate
+        )
     except Exception:
         error_message = traceback.format_exc()
         raise ApiFailure(error_message, 500)
+
+    return response
diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py
index dd380ccaf1..5896a5aedb 100644
--- a/paasta_tools/cli/cmds/remote_run_2.py
+++ b/paasta_tools/cli/cmds/remote_run_2.py
@@ -12,9 +12,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
+import os
+import subprocess
+import sys
+import time
+
 from paasta_tools.api.client import get_paasta_oapi_client
+from paasta_tools.cli.cmds.check import makefile_responds_to
+from paasta_tools.cli.cmds.cook_image import paasta_cook_image
 from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
 from paasta_tools.cli.utils import lazy_choices_completer
+from paasta_tools.utils import DEFAULT_SOA_DIR
+from paasta_tools.utils import get_username
 from paasta_tools.utils import list_services
 from paasta_tools.utils import load_system_paasta_config
 from paasta_tools.utils import PaastaColors
@@ -56,6 +66,31 @@ def add_subparser(
         help="Run stuff remotely.",
         description=("'paasta remote-run' runs stuff remotely "),
     )
+    remote_run_parser.add_argument(
+        "-b",
+        "--build",
+        dest="build",
+        help="Build the image from current directory",
+        action="store_true",
+    )
+    remote_run_parser.add_argument(
+        "-y",
+        "--yelpsoa-config-root",
+        dest="yelpsoa_config_root",
+        help="A directory from which yelpsoa-configs should be read from",
+        default=DEFAULT_SOA_DIR,
+    )
+    remote_run_parser.add_argument(
+        "-I",
+        "--interactive",
+        help=(
+            'Run container in interactive mode. If interactive is set the default command will be "bash" '
+            'unless otherwise set by the "--cmd" flag'
+        ),
+        action="store_true",
+        required=False,
+        default=False,
+    )
     add_common_args_to_parser(remote_run_parser)
     remote_run_parser.set_defaults(command=remote_run)
 
@@ -67,9 +102,23 @@ def paasta_remote_run(
     system_paasta_config: SystemPaastaConfig,
     verbose: int,
     is_eks: bool = False,
+    build: bool = False,
 ) -> int:
+
     output = []
     ret_code = 0
+
+    # TODO: Build
+    if build and not makefile_responds_to("cook-image"):
+        print(
+            "A local Makefile with a 'cook-image' target is required for --build",
+            file=sys.stderr,
+        )
+        default_tag = "paasta-remote-run-{}-{}".format(service, get_username())
+        os.environ["DOCKER_TAG"] = default_tag
+        paasta_cook_image(args=None, service=service, soa_dir=soa_dir)
+        # TODO Actually push the image
+
     client = get_paasta_oapi_client(
         cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
         system_paasta_config=system_paasta_config,
@@ -79,11 +128,16 @@ def paasta_remote_run(
         exit(1)
 
     try:
+        # TODO add image argument if build
         response = client.service.remote_run(
-            service=service, instance=instance, user="qlo"
+            service=service,
+            instance=instance,
+            user=get_username(),
         )
-        print(response)
+        print("Reponse was: ", response)
+        response = json.loads(response)
     except client.api_error as exc:
+        print(exc, file=sys.stderr)
         output.append(PaastaColors.red(exc.reason))
         ret_code = exc.status
     except (client.connection_error, client.timeout_error) as exc:
@@ -96,14 +150,31 @@ def paasta_remote_run(
         output.append(str(e))
         ret_code = 1
 
-    print("\n".join(output))
+    if ret_code:
+        print("\n".join(output))
+        return ret_code
+
+    pod_name, namespace = response["pod_name"], response["namespace"]
+    exec_command_tmpl = "kubectl{eks}-{cluster} exec -it -n {namespace} {pod} /bin/bash"
+    exec_command = exec_command_tmpl.format(
+        eks="-eks" if is_eks else "", cluster=cluster, namespace=namespace, pod=pod_name
+    )
+    print("Pod launched successfully:", pod_name)
+
+    # TODO figure out how to get this to work
+    # print('Attaching shell')
+    # cmd = subprocess.Popen(exec_command.split(' '))
+    print("Run the following command to enter your service pod")
+    print(exec_command)
 
     return ret_code
 
 
 def remote_run(args) -> int:
     """Run stuff, but remotely!"""
-    system_paasta_config = load_system_paasta_config()
-    paasta_remote_run(
+    system_paasta_config = load_system_paasta_config(
+        "/nail/home/qlo/paasta_config/paasta/"
+    )
+    return paasta_remote_run(
         args.cluster, args.service, args.instance, system_paasta_config, 1, False
     )
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index ed60a73aa7..0c60d70d0f 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -1,5 +1,8 @@
+import json
 from time import sleep
 
+from kubernetes.client.exceptions import ApiException
+
 from paasta_tools.kubernetes.application.controller_wrappers import (
     get_application_wrapper,
 )
@@ -8,44 +11,91 @@
 from paasta_tools.utils import DEFAULT_SOA_DIR
 
 
-def remote_run_start(service, instance, user, cluster):
-    kube_client = KubeClient()
+def remote_run_start(service, instance, user, cluster, interactive, recreate):
+    # TODO Overriding the kube client config for now as the api has limited permissions
+    kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
+
+    # TODO hardcoded for now
     is_eks = False
+
+    # Load the service deployment settings
     deployment = load_kubernetes_service_config_no_cache(
         service, instance, cluster, DEFAULT_SOA_DIR
     )
     namespace = deployment.get_namespace()
 
+    # Set to interactive mode
+    if interactive:
+        deployment.config_dict["cmd"] = "sleep 604800"  # One week
+
+    # Create the app with a new name
     formatted_application = deployment.format_kubernetes_app()
     formatted_application.metadata.name += f"-remote-run-{user}"
-    pod_name = formatted_application.metadata.name
+    deployment_name = formatted_application.metadata.name
     app_wrapper = get_application_wrapper(formatted_application)
     app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks)
-    app_wrapper.create(kube_client)
 
+    # Launch pod
+    try:
+        app_wrapper.create(kube_client)
+    except ApiException as e:
+        if e.status == 409:
+            # Deployment already running
+            if recreate:
+                app_wrapper.deep_delete(kube_client)
+                wait_until_deployment_gone(kube_client, namespace, deployment_name)
+                app_wrapper.create(kube_client)
+            else:
+                raise
+
+    pod = wait_until_pod_running(kube_client, namespace, deployment_name)
+
+    return json.dumps(
+        {"status": "success", "pod_name": pod.metadata.name, "namespace": namespace}
+    )
+
+
+def wait_until_deployment_gone(kube_client, namespace, deployment_name):
+    for retry in range(10):
+        pod = find_pod(kube_client, namespace, deployment_name, 1)
+        if not pod:
+            return
+        sleep(5)
+    raise Exception("Pod still exists!")
+
+
+def find_pod(kube_client, namespace, deployment_name, retries=5):
     # Get pod status and name
-    for retry in range(5):
+    for retry in range(retries):
         pod_list = kube_client.core.list_namespaced_pod(namespace)
         matching_pod = None
         for pod in pod_list.items:
-            if pod.metadata.name.startswith(pod_name):
+            if pod.metadata.name.startswith(deployment_name):
                 matching_pod = pod
                 break
 
         if not matching_pod:
-            sleep(1)
+            sleep(2)
             continue
+        return matching_pod
+    return None
 
+
+def wait_until_pod_running(kube_client, namespace, deployment_name):
+    for retry in range(5):
+        pod = find_pod(kube_client, namespace, deployment_name)
+        if not pod:
+            raise Exception("No matching pod!")
         if pod.status.phase == "Running":
             break
-        elif pod.status.phase != "Initializing":
+        elif pod.status.phase not in ("Initializing", "Pending"):
             raise Exception(f"Pod state is {pod.status.phase}")
-
-    if not matching_pod:
-        raise Exception("No matching pod")
-
-    return {"Status": "Success!", "pod_name": pod.metadata.name, "namespace": namespace}
+    return pod
 
 
-def remote_run_stop():
-    pass
+# def remote_run_stop():
+# TODO Should this happen here or should the client kill the deployment directly?
+# Load the service deployment settings
+# deployment = load_kubernetes_service_config_no_cache(
+#    service, instance, cluster, DEFAULT_SOA_DIR
+# )
diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py
index 7db7aec9fe..8050e75d2b 100644
--- a/paasta_tools/paastaapi/api/service_api.py
+++ b/paasta_tools/paastaapi/api/service_api.py
@@ -27,6 +27,7 @@
 from paasta_tools.paastaapi.model.flink_config import FlinkConfig
 from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails
 from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs
+from paasta_tools.paastaapi.model.inline_object1 import InlineObject1
 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200
 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001
 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus
@@ -1303,7 +1304,7 @@ def __remote_run(
             self,
             service,
             instance,
-            user,
+            inline_object1,
             **kwargs
         ):
             """Launch a remote-run pod  # noqa: E501
@@ -1311,13 +1312,13 @@ def __remote_run(
             This method makes a synchronous HTTP request by default. To make an
             asynchronous HTTP request, please pass async_req=True
 
-            >>> thread = api.remote_run(service, instance, user, async_req=True)
+            >>> thread = api.remote_run(service, instance, inline_object1, async_req=True)
             >>> result = thread.get()
 
             Args:
                 service (str): Service name
                 instance (str): Instance name
-                user (str): Username
+                inline_object1 (InlineObject1):
 
             Keyword Args:
                 _return_http_data_only (bool): response data without head status
@@ -1368,8 +1369,8 @@ def __remote_run(
                 service
             kwargs['instance'] = \
                 instance
-            kwargs['user'] = \
-                user
+            kwargs['inline_object1'] = \
+                inline_object1
             return self.call_with_http_info(**kwargs)
 
         self.remote_run = Endpoint(
@@ -1385,12 +1386,12 @@ def __remote_run(
                 'all': [
                     'service',
                     'instance',
-                    'user',
+                    'inline_object1',
                 ],
                 'required': [
                     'service',
                     'instance',
-                    'user',
+                    'inline_object1',
                 ],
                 'nullable': [
                 ],
@@ -1409,18 +1410,17 @@ def __remote_run(
                         (str,),
                     'instance':
                         (str,),
-                    'user':
-                        (str,),
+                    'inline_object1':
+                        (InlineObject1,),
                 },
                 'attribute_map': {
                     'service': 'service',
                     'instance': 'instance',
-                    'user': 'user',
                 },
                 'location_map': {
                     'service': 'path',
                     'instance': 'path',
-                    'user': 'query',
+                    'inline_object1': 'body',
                 },
                 'collection_format_map': {
                 }
@@ -1429,7 +1429,9 @@ def __remote_run(
                 'accept': [
                     'application/json'
                 ],
-                'content_type': [],
+                'content_type': [
+                    'application/json'
+                ]
             },
             api_client=api_client,
             callable=__remote_run
diff --git a/paasta_tools/paastaapi/models/__init__.py b/paasta_tools/paastaapi/models/__init__.py
index 0d1278ee23..bda54f7889 100644
--- a/paasta_tools/paastaapi/models/__init__.py
+++ b/paasta_tools/paastaapi/models/__init__.py
@@ -26,6 +26,7 @@
 from paasta_tools.paastaapi.model.float_and_error import FloatAndError
 from paasta_tools.paastaapi.model.hpa_metric import HPAMetric
 from paasta_tools.paastaapi.model.inline_object import InlineObject
+from paasta_tools.paastaapi.model.inline_object1 import InlineObject1
 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200
 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001
 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus

From c6d4c20fbe0fc8c98d4d038e644287f259982895 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Tue, 5 Nov 2024 19:54:03 -0800
Subject: [PATCH 05/13] change Deployment to Job

---
 paasta_tools/api/api.py                       |   4 +-
 paasta_tools/api/api_docs/oapi.yaml           |  83 ++++++++++-
 paasta_tools/api/api_docs/swagger.json        |  96 +++++++++++-
 paasta_tools/api/views/instance.py            |  16 +-
 .../application/controller_wrappers.py        |  58 +++++++-
 paasta_tools/kubernetes_tools.py              |  87 ++++++++++-
 paasta_tools/paasta_remote_run_2.py           |  45 +++---
 paasta_tools/paastaapi/api/service_api.py     | 138 ------------------
 paasta_tools/paastaapi/apis/__init__.py       |   1 +
 paasta_tools/paastaapi/models/__init__.py     |   1 +
 10 files changed, 358 insertions(+), 171 deletions(-)

diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py
index a0f20d89c8..8108e78548 100644
--- a/paasta_tools/api/api.py
+++ b/paasta_tools/api/api.py
@@ -149,8 +149,8 @@ def make_app(global_config=None):
         "/v1/services/{service}/{instance}/tasks/{task_id}",
     )
     config.add_route(
-        "service.instance.remote_run",
-        "/v1/services/{service}/{instance}/remote_run",
+        "remote_run",
+        "/v1/remote_run/{service}/{instance}",
     )
     config.add_route("service.list", "/v1/services/{service}")
     config.add_route("services", "/v1/services")
diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index 400f627735..56db1b467b 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1627,9 +1627,9 @@ paths:
       summary: Get mesos task of service_name.instance_name by task_id
       tags:
       - service
-  /services/{service}/{instance}/remote_run:
+  /remote_run/{service}/{instance}/start:
     post:
-      operationId: remote_run
+      operationId: remote_run_start
       requestBody:
         content:
           application/json:
@@ -1672,7 +1672,84 @@ paths:
           description: Failure
       summary: Launch a remote-run pod
       tags:
-      - service
+      - remote_run
+  /remote_run/{service}/{instance}/stop:
+    post:
+      operationId: remote_run_sop
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                user:
+                  type: string
+              required:
+               - user
+        required: true
+      parameters:
+      - description: Service name
+        in: path
+        name: service
+        required: true
+        schema:
+          type: string
+      - description: Instance name
+        in: path
+        name: instance
+        required: true
+        schema:
+          type: string
+      responses:
+        "200":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Remote run pod stopped
+        "404":
+          description: Deployment key not found
+        "500":
+          description: Failure
+      summary: Stop a remote-run pod
+      tags:
+      - remote_run
+  /remote_run/{service}/{instance}/token:
+    get:
+      operationId: remote_run_token
+      parameters:
+      - description: Service name
+        in: path
+        name: service
+        required: true
+        schema:
+          type: string
+      - description: Instance name
+        in: path
+        name: instance
+        required: true
+        schema:
+          type: string
+      - description: User name
+        in: query
+        name: user
+        schema:
+          type: string
+        required: true
+      responses:
+        "200":
+          content:
+            application/json:
+              schema:
+                type: string
+          description: Token generated successfully
+        "404":
+          description: Deployment key not found
+        "500":
+          description: Failure
+      summary: Get a short lived token for exec into remote-run pod
+      tags:
+      - remote_run
   /version:
     get:
       operationId: showVersion
diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json
index 7ee0a58740..4c4b4e42a6 100644
--- a/paasta_tools/api/api_docs/swagger.json
+++ b/paasta_tools/api/api_docs/swagger.json
@@ -847,7 +847,7 @@
                 ]
             }
         },
-        "/services/{service}/{instance}/remote_run": {
+        "/remote_run/{service}/{instance}/start": {
             "post": {
                 "responses": {
                     "200": {
@@ -864,7 +864,53 @@
                     }
                 },
                 "summary": "Do a remote run",
-                "operationId": "remote_run",
+                "operationId": "remote_run_start",
+                "tags": [
+                    "remote_run"
+                ],
+                "parameters": [
+                    {
+                        "in": "path",
+                        "description": "Service name",
+                        "name": "service",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "path",
+                        "description": "Instance name",
+                        "name": "instance",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "query",
+                        "description": "Username",
+                        "name": "user",
+                        "required": true,
+                        "type": "string"
+                    }
+                ]
+            }
+        },
+        "/remote_run/{service}/{instance}/stop": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "It worked!",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "404": {
+                        "description": "Deployment key not found"
+                    },
+                    "500": {
+                        "description": "Failure"
+                    }
+                },
+                "summary": "Stop a remote run",
+                "operationId": "remote_run_stop",
                 "tags": [
                     "service"
                 ],
@@ -892,6 +938,52 @@
                     }
                 ]
             }
+        },
+        "/remote_run/{service}/{instance}/token": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "It worked!",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "404": {
+                        "description": "Deployment key not found"
+                    },
+                    "500": {
+                        "description": "Failure"
+                    }
+                },
+                "summary": "Get a remote run token",
+                "operationId": "remote_run_token",
+                "tags": [
+                    "remote_run"
+                ],
+                "parameters": [
+                    {
+                        "in": "path",
+                        "description": "Service name",
+                        "name": "service",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "path",
+                        "description": "Instance name",
+                        "name": "instance",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "query",
+                        "description": "Username",
+                        "name": "user",
+                        "required": true,
+                        "type": "string"
+                    }
+                ]
+            }
         }
     },
     "definitions": {
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 4c4e161bb6..224fd8fd4c 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -388,10 +388,18 @@ def instance_mesh_status(request):
     return instance_mesh
 
 
-@view_config(
-    route_name="service.instance.remote_run", request_method="POST", renderer="json"
-)
-def remote_run(request):
+@view_config(route_name="remote_run.stop", request_method="POST", renderer="json")
+def remote_run_stop(request):
+    pass
+
+
+@view_config(route_name="remote_run.token", request_method="POST", renderer="json")
+def remote_run_token(request):
+    pass
+
+
+@view_config(route_name="remote_run.start", request_method="POST", renderer="json")
+def remote_run_start(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
     user = request.swagger_data.get("user")
diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py
index 21026120f8..2069871e69 100644
--- a/paasta_tools/kubernetes/application/controller_wrappers.py
+++ b/paasta_tools/kubernetes/application/controller_wrappers.py
@@ -7,12 +7,14 @@
 from kubernetes.client import V1beta1PodDisruptionBudget
 from kubernetes.client import V1DeleteOptions
 from kubernetes.client import V1Deployment
+from kubernetes.client import V1Job
 from kubernetes.client import V1StatefulSet
 from kubernetes.client.rest import ApiException
 
 from paasta_tools.autoscaling.autoscaling_service_lib import autoscaling_is_paused
 from paasta_tools.eks_tools import load_eks_service_config_no_cache
 from paasta_tools.kubernetes_tools import create_deployment
+from paasta_tools.kubernetes_tools import create_job
 from paasta_tools.kubernetes_tools import create_pod_disruption_budget
 from paasta_tools.kubernetes_tools import create_stateful_set
 from paasta_tools.kubernetes_tools import ensure_service_account
@@ -51,7 +53,6 @@ def __init__(
                 "config_sha",
             ]
         }
-
         replicas = (
             item.spec.replicas
             if item.metadata.labels.get(paasta_prefixed("autoscaled"), "false")
@@ -411,14 +412,67 @@ def update(self, kube_client: KubeClient):
         )
 
 
+class JobWrapper(Application):
+    def __init__(
+        self,
+        item: V1Job,
+        logging=logging.getLogger(__name__),
+    ):
+        item.spec.replicas = None
+        super().__init__(item, logging)
+
+    def deep_delete(self, kube_client: KubeClient) -> None:
+        """
+        Remove all controllers, pods, and pod disruption budgets related to this application
+        :param kube_client:
+        """
+        delete_options = V1DeleteOptions(propagation_policy="Foreground")
+        try:
+            kube_client.batchess.delete_namespaced_job(
+                self.item.metadata.name,
+                self.item.metadata.namespace,
+                body=delete_options,
+            )
+        except ApiException as e:
+            if e.status == 404:
+                # Job does not exist, nothing to delete but
+                # we can consider this a success.
+                self.logging.debug(
+                    "not deleting nonexistent job/{} from namespace/{}".format(
+                        self.item.metadata.name, self.item.metadata.namespace
+                    )
+                )
+            else:
+                raise
+        else:
+            self.logging.info(
+                "deleted job/{} from namespace/{}".format(
+                    self.item.metadata.name, self.item.metadata.namespace
+                )
+            )
+
+    def create(self, kube_client: KubeClient):
+        create_job(
+            kube_client=kube_client,
+            formatted_job=self.item,
+            namespace=self.soa_config.get_namespace(),
+        )
+
+    def update(self, kube_client: KubeClient):
+        # TODO: Is this needed?
+        pass
+
+
 def get_application_wrapper(
-    formatted_application: Union[V1Deployment, V1StatefulSet]
+    formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
 ) -> Application:
     app: Application
     if isinstance(formatted_application, V1Deployment):
         app = DeploymentWrapper(formatted_application)
     elif isinstance(formatted_application, V1StatefulSet):
         app = StatefulSetWrapper(formatted_application)
+    elif isinstance(formatted_application, V1Job):
+        app = JobWrapper(formatted_application)
     else:
         raise Exception("Unknown kubernetes object to update")
 
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index a4a6f43392..51124637d8 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -48,6 +48,7 @@
 from humanfriendly import parse_size
 from kubernetes import client as kube_client
 from kubernetes import config as kube_config
+from kubernetes.client import AuthenticationV1TokenRequest
 from kubernetes.client import CoreV1Event
 from kubernetes.client import models
 from kubernetes.client import V1Affinity
@@ -71,6 +72,8 @@
 from kubernetes.client import V1ExecAction
 from kubernetes.client import V1HostPathVolumeSource
 from kubernetes.client import V1HTTPGetAction
+from kubernetes.client import V1Job
+from kubernetes.client import V1JobSpec
 from kubernetes.client import V1KeyToPath
 from kubernetes.client import V1LabelSelector
 from kubernetes.client import V1Lifecycle
@@ -113,6 +116,7 @@
 from kubernetes.client import V1StatefulSetSpec
 from kubernetes.client import V1Subject
 from kubernetes.client import V1TCPSocketAction
+from kubernetes.client import V1TokenRequestSpec
 from kubernetes.client import V1TopologySpreadConstraint
 from kubernetes.client import V1Volume
 from kubernetes.client import V1VolumeMount
@@ -605,6 +609,7 @@ def __init__(
         self.core = kube_client.CoreV1Api(self.api_client)
         self.policy = kube_client.PolicyV1beta1Api(self.api_client)
         self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client)
+        self.batches = kube_client.BatchV1Api(self.api_client)
 
         self.custom = kube_client.CustomObjectsApi(self.api_client)
         self.autoscaling = kube_client.AutoscalingV2beta2Api(self.api_client)
@@ -2029,6 +2034,62 @@ def get_pod_management_policy(self) -> str:
         """Get sts pod_management_policy from config, default to 'OrderedReady'"""
         return self.config_dict.get("pod_management_policy", "OrderedReady")
 
+    def format_as_kubernetes_job(self) -> V1Job:
+        """Create the config for launching the deployment as a Job"""
+        try:
+            docker_url = self.get_docker_url()
+            git_sha = get_git_sha_from_dockerurl(docker_url, long=True)
+            system_paasta_config = load_system_paasta_config()
+            complete_config = V1Job(
+                api_version="batch/v1",
+                kind="Job",
+                metadata=self.get_kubernetes_metadata(git_sha),
+                spec=V1JobSpec(
+                    active_deadline_seconds=3600,
+                    selector=V1LabelSelector(
+                        match_labels={
+                            "paasta.yelp.com/service": self.get_service(),
+                            "paasta.yelp.com/instance": self.get_instance(),
+                        }
+                    ),
+                    template=self.get_pod_template_spec(
+                        git_sha=git_sha, system_paasta_config=system_paasta_config
+                    ),
+                ),
+            )
+
+            prometheus_shard = self.get_prometheus_shard()
+            if prometheus_shard:
+                complete_config.metadata.labels[
+                    "paasta.yelp.com/prometheus_shard"
+                ] = prometheus_shard
+
+            image_version = self.get_image_version()
+            if image_version is not None:
+                complete_config.metadata.labels[
+                    "paasta.yelp.com/image_version"
+                ] = image_version
+
+            complete_config.metadata.labels["paasta.yelp.com/remote_run"] = "true"
+            # DO NOT ADD LABELS AFTER THIS LINE
+            config_hash = get_config_hash(
+                self.sanitize_for_config_hash(complete_config),
+                force_bounce=self.get_force_bounce(),
+            )
+            complete_config.metadata.labels["yelp.com/paasta_config_sha"] = config_hash
+            complete_config.metadata.labels["paasta.yelp.com/config_sha"] = config_hash
+
+            complete_config.spec.template.metadata.labels[
+                "yelp.com/paasta_config_sha"
+            ] = config_hash
+            complete_config.spec.template.metadata.labels[
+                "paasta.yelp.com/config_sha"
+            ] = config_hash
+        except Exception as e:
+            raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance())
+        log.debug("Complete configuration for instance is: %s", complete_config)
+        return complete_config
+
     def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]:
         """Create the configuration that will be passed to the Kubernetes REST API."""
 
@@ -2517,7 +2578,7 @@ def sanitize_for_config_hash(
 
         # remove data we dont want used to hash configs
         # replica count
-        if ahash["spec"] is not None:
+        if ahash["spec"] is not None and "replicas" in ahash["spec"]:
             del ahash["spec"]["replicas"]
 
         if ahash["metadata"] is not None:
@@ -3631,6 +3692,16 @@ def create_stateful_set(
     )
 
 
+def create_job(
+    kube_client: KubeClient,
+    formatted_job: V1Job,
+    namespace: str,
+) -> None:
+    return kube_client.batches.create_namespaced_job(
+        namespace=namespace, body=formatted_job
+    )
+
+
 def update_stateful_set(
     kube_client: KubeClient,
     formatted_stateful_set: V1StatefulSet,
@@ -4354,6 +4425,20 @@ def get_kubernetes_secret_env_variables(
     return decrypted_secrets
 
 
+def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str):
+    """Create a short lived token for exec"""
+    audience = "remote-run-" + user
+    service_account = "default"
+    token_spec = V1TokenRequestSpec(
+        expiration_seconds=600, audiences=[audience]  # minimum allowed by k8s
+    )
+    request = AuthenticationV1TokenRequest(spec=token_spec)
+    response = kube_client.core.create_namespaced_service_account_token(
+        service_account, namespace, request
+    )
+    return response
+
+
 def get_kubernetes_secret_volumes(
     kube_client: KubeClient,
     secret_volumes_config: Sequence[SecretVolume],
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 0c60d70d0f..e2f49caf2a 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -6,11 +6,22 @@
 from paasta_tools.kubernetes.application.controller_wrappers import (
     get_application_wrapper,
 )
+from paasta_tools.kubernetes_tools import create_temp_exec_token
 from paasta_tools.kubernetes_tools import KubeClient
 from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache
 from paasta_tools.utils import DEFAULT_SOA_DIR
 
 
+def create_exec_token(namespace):
+    """Creates a short lived token for execing into a pod"""
+    kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
+    try:
+        token = create_temp_exec_token(kube_client, namespace, user)
+    except ApiException as E:
+        raise
+    return token["token"]
+
+
 def remote_run_start(service, instance, user, cluster, interactive, recreate):
     # TODO Overriding the kube client config for now as the api has limited permissions
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
@@ -29,10 +40,10 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate):
         deployment.config_dict["cmd"] = "sleep 604800"  # One week
 
     # Create the app with a new name
-    formatted_application = deployment.format_kubernetes_app()
-    formatted_application.metadata.name += f"-remote-run-{user}"
-    deployment_name = formatted_application.metadata.name
-    app_wrapper = get_application_wrapper(formatted_application)
+    formatted_job = deployment.format_as_kubernetes_job()
+    formatted_job.metadata.name = f"remote-run-{user}-{formatted_job.metadata.name}"
+    job_name = formatted_job.metadata.name
+    app_wrapper = get_application_wrapper(formatted_job)
     app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks)
 
     # Launch pod
@@ -40,37 +51,33 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate):
         app_wrapper.create(kube_client)
     except ApiException as e:
         if e.status == 409:
-            # Deployment already running
-            if recreate:
-                app_wrapper.deep_delete(kube_client)
-                wait_until_deployment_gone(kube_client, namespace, deployment_name)
-                app_wrapper.create(kube_client)
-            else:
-                raise
+            # Job already running
+            return json.dumps({"status": "409", "reason": "Job already running"})
+        raise
 
-    pod = wait_until_pod_running(kube_client, namespace, deployment_name)
+    pod = wait_until_pod_running(kube_client, namespace, job_name)
 
     return json.dumps(
-        {"status": "success", "pod_name": pod.metadata.name, "namespace": namespace}
+        {"status": "200", "pod_name": pod.metadata.name, "namespace": namespace}
     )
 
 
-def wait_until_deployment_gone(kube_client, namespace, deployment_name):
+def wait_until_deployment_gone(kube_client, namespace, job_name):
     for retry in range(10):
-        pod = find_pod(kube_client, namespace, deployment_name, 1)
+        pod = find_pod(kube_client, namespace, job_name, 1)
         if not pod:
             return
         sleep(5)
     raise Exception("Pod still exists!")
 
 
-def find_pod(kube_client, namespace, deployment_name, retries=5):
+def find_pod(kube_client, namespace, job_name, retries=5):
     # Get pod status and name
     for retry in range(retries):
         pod_list = kube_client.core.list_namespaced_pod(namespace)
         matching_pod = None
         for pod in pod_list.items:
-            if pod.metadata.name.startswith(deployment_name):
+            if pod.metadata.name.startswith(job_name):
                 matching_pod = pod
                 break
 
@@ -81,9 +88,9 @@ def find_pod(kube_client, namespace, deployment_name, retries=5):
     return None
 
 
-def wait_until_pod_running(kube_client, namespace, deployment_name):
+def wait_until_pod_running(kube_client, namespace, job_name):
     for retry in range(5):
-        pod = find_pod(kube_client, namespace, deployment_name)
+        pod = find_pod(kube_client, namespace, job_name)
         if not pod:
             raise Exception("No matching pod!")
         if pod.status.phase == "Running":
diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py
index 8050e75d2b..63901f7cee 100644
--- a/paasta_tools/paastaapi/api/service_api.py
+++ b/paasta_tools/paastaapi/api/service_api.py
@@ -27,7 +27,6 @@
 from paasta_tools.paastaapi.model.flink_config import FlinkConfig
 from paasta_tools.paastaapi.model.flink_job_details import FlinkJobDetails
 from paasta_tools.paastaapi.model.flink_jobs import FlinkJobs
-from paasta_tools.paastaapi.model.inline_object1 import InlineObject1
 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200
 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001
 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus
@@ -1300,143 +1299,6 @@ def __mesh_instance(
             callable=__mesh_instance
         )
 
-        def __remote_run(
-            self,
-            service,
-            instance,
-            inline_object1,
-            **kwargs
-        ):
-            """Launch a remote-run pod  # noqa: E501
-
-            This method makes a synchronous HTTP request by default. To make an
-            asynchronous HTTP request, please pass async_req=True
-
-            >>> thread = api.remote_run(service, instance, inline_object1, async_req=True)
-            >>> result = thread.get()
-
-            Args:
-                service (str): Service name
-                instance (str): Instance name
-                inline_object1 (InlineObject1):
-
-            Keyword Args:
-                _return_http_data_only (bool): response data without head status
-                    code and headers. Default is True.
-                _preload_content (bool): if False, the urllib3.HTTPResponse object
-                    will be returned without reading/decoding response data.
-                    Default is True.
-                _request_timeout (float/tuple): timeout setting for this request. If one
-                    number provided, it will be total request timeout. It can also
-                    be a pair (tuple) of (connection, read) timeouts.
-                    Default is None.
-                _check_input_type (bool): specifies if type checking
-                    should be done one the data sent to the server.
-                    Default is True.
-                _check_return_type (bool): specifies if type checking
-                    should be done one the data received from the server.
-                    Default is True.
-                _host_index (int/None): specifies the index of the server
-                    that we want to use.
-                    Default is read from the configuration.
-                async_req (bool): execute request asynchronously
-
-            Returns:
-                str
-                    If the method is called asynchronously, returns the request
-                    thread.
-            """
-            kwargs['async_req'] = kwargs.get(
-                'async_req', False
-            )
-            kwargs['_return_http_data_only'] = kwargs.get(
-                '_return_http_data_only', True
-            )
-            kwargs['_preload_content'] = kwargs.get(
-                '_preload_content', True
-            )
-            kwargs['_request_timeout'] = kwargs.get(
-                '_request_timeout', None
-            )
-            kwargs['_check_input_type'] = kwargs.get(
-                '_check_input_type', True
-            )
-            kwargs['_check_return_type'] = kwargs.get(
-                '_check_return_type', True
-            )
-            kwargs['_host_index'] = kwargs.get('_host_index')
-            kwargs['service'] = \
-                service
-            kwargs['instance'] = \
-                instance
-            kwargs['inline_object1'] = \
-                inline_object1
-            return self.call_with_http_info(**kwargs)
-
-        self.remote_run = Endpoint(
-            settings={
-                'response_type': (str,),
-                'auth': [],
-                'endpoint_path': '/services/{service}/{instance}/remote_run',
-                'operation_id': 'remote_run',
-                'http_method': 'POST',
-                'servers': None,
-            },
-            params_map={
-                'all': [
-                    'service',
-                    'instance',
-                    'inline_object1',
-                ],
-                'required': [
-                    'service',
-                    'instance',
-                    'inline_object1',
-                ],
-                'nullable': [
-                ],
-                'enum': [
-                ],
-                'validation': [
-                ]
-            },
-            root_map={
-                'validations': {
-                },
-                'allowed_values': {
-                },
-                'openapi_types': {
-                    'service':
-                        (str,),
-                    'instance':
-                        (str,),
-                    'inline_object1':
-                        (InlineObject1,),
-                },
-                'attribute_map': {
-                    'service': 'service',
-                    'instance': 'instance',
-                },
-                'location_map': {
-                    'service': 'path',
-                    'instance': 'path',
-                    'inline_object1': 'body',
-                },
-                'collection_format_map': {
-                }
-            },
-            headers_map={
-                'accept': [
-                    'application/json'
-                ],
-                'content_type': [
-                    'application/json'
-                ]
-            },
-            api_client=api_client,
-            callable=__remote_run
-        )
-
         def __status_instance(
             self,
             service,
diff --git a/paasta_tools/paastaapi/apis/__init__.py b/paasta_tools/paastaapi/apis/__init__.py
index a93cebf52c..ab815d8c70 100644
--- a/paasta_tools/paastaapi/apis/__init__.py
+++ b/paasta_tools/paastaapi/apis/__init__.py
@@ -17,5 +17,6 @@
 # Import APIs into API package:
 from paasta_tools.paastaapi.api.autoscaler_api import AutoscalerApi
 from paasta_tools.paastaapi.api.default_api import DefaultApi
+from paasta_tools.paastaapi.api.remote_run_api import RemoteRunApi
 from paasta_tools.paastaapi.api.resources_api import ResourcesApi
 from paasta_tools.paastaapi.api.service_api import ServiceApi
diff --git a/paasta_tools/paastaapi/models/__init__.py b/paasta_tools/paastaapi/models/__init__.py
index bda54f7889..2a8cbf44b2 100644
--- a/paasta_tools/paastaapi/models/__init__.py
+++ b/paasta_tools/paastaapi/models/__init__.py
@@ -27,6 +27,7 @@
 from paasta_tools.paastaapi.model.hpa_metric import HPAMetric
 from paasta_tools.paastaapi.model.inline_object import InlineObject
 from paasta_tools.paastaapi.model.inline_object1 import InlineObject1
+from paasta_tools.paastaapi.model.inline_object2 import InlineObject2
 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200
 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001
 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus

From 0b2e0827081c2caf5f0337de0e9e8caa6b3614e7 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Wed, 6 Nov 2024 16:24:58 -0800
Subject: [PATCH 06/13] Ensure service account

---
 paasta_tools/api/api.py                       | 12 +++-
 paasta_tools/api/api_docs/oapi.yaml           |  2 +-
 .../application/controller_wrappers.py        |  6 +-
 paasta_tools/kubernetes_tools.py              | 64 +++++++++++++------
 4 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py
index 8108e78548..0f3ba149a2 100644
--- a/paasta_tools/api/api.py
+++ b/paasta_tools/api/api.py
@@ -149,8 +149,16 @@ def make_app(global_config=None):
         "/v1/services/{service}/{instance}/tasks/{task_id}",
     )
     config.add_route(
-        "remote_run",
-        "/v1/remote_run/{service}/{instance}",
+        "remote_run_start",
+        "/v1/remote_run/{service}/{instance}/start",
+    )
+    config.add_route(
+        "remote_run_stop",
+        "/v1/remote_run/{service}/{instance}/stop",
+    )
+    config.add_route(
+        "remote_run_token",
+        "/v1/remote_run/{service}/{instance}/token",
     )
     config.add_route("service.list", "/v1/services/{service}")
     config.add_route("services", "/v1/services")
diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index 56db1b467b..b70e434fab 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1675,7 +1675,7 @@ paths:
       - remote_run
   /remote_run/{service}/{instance}/stop:
     post:
-      operationId: remote_run_sop
+      operationId: remote_run_stop
       requestBody:
         content:
           application/json:
diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py
index 2069871e69..09d7d90ec4 100644
--- a/paasta_tools/kubernetes/application/controller_wrappers.py
+++ b/paasta_tools/kubernetes/application/controller_wrappers.py
@@ -428,7 +428,7 @@ def deep_delete(self, kube_client: KubeClient) -> None:
         """
         delete_options = V1DeleteOptions(propagation_policy="Foreground")
         try:
-            kube_client.batchess.delete_namespaced_job(
+            kube_client.batches.delete_namespaced_job(
                 self.item.metadata.name,
                 self.item.metadata.namespace,
                 body=delete_options,
@@ -458,10 +458,6 @@ def create(self, kube_client: KubeClient):
             namespace=self.soa_config.get_namespace(),
         )
 
-    def update(self, kube_client: KubeClient):
-        # TODO: Is this needed?
-        pass
-
 
 def get_application_wrapper(
     formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index 51124637d8..90b97afc4e 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -2046,12 +2046,6 @@ def format_as_kubernetes_job(self) -> V1Job:
                 metadata=self.get_kubernetes_metadata(git_sha),
                 spec=V1JobSpec(
                     active_deadline_seconds=3600,
-                    selector=V1LabelSelector(
-                        match_labels={
-                            "paasta.yelp.com/service": self.get_service(),
-                            "paasta.yelp.com/instance": self.get_instance(),
-                        }
-                    ),
                     template=self.get_pod_template_spec(
                         git_sha=git_sha, system_paasta_config=system_paasta_config
                     ),
@@ -2071,23 +2065,11 @@ def format_as_kubernetes_job(self) -> V1Job:
                 ] = image_version
 
             complete_config.metadata.labels["paasta.yelp.com/remote_run"] = "true"
-            # DO NOT ADD LABELS AFTER THIS LINE
-            config_hash = get_config_hash(
-                self.sanitize_for_config_hash(complete_config),
-                force_bounce=self.get_force_bounce(),
-            )
-            complete_config.metadata.labels["yelp.com/paasta_config_sha"] = config_hash
-            complete_config.metadata.labels["paasta.yelp.com/config_sha"] = config_hash
-
-            complete_config.spec.template.metadata.labels[
-                "yelp.com/paasta_config_sha"
-            ] = config_hash
-            complete_config.spec.template.metadata.labels[
-                "paasta.yelp.com/config_sha"
-            ] = config_hash
         except Exception as e:
             raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance())
-        log.debug("Complete configuration for instance is: %s", complete_config)
+        log.debug(
+            "Complete configuration for remote-run instance is: %s", complete_config
+        )
         return complete_config
 
     def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]:
@@ -2847,6 +2829,7 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
             else:
                 raise
 
+    ensure_paasta_remote_run_service_account(kube_client, namespace)
     ensure_paasta_api_rolebinding(kube_client, namespace)
     ensure_paasta_namespace_limits(kube_client, namespace)
 
@@ -2880,6 +2863,45 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No
         )
 
 
+def ensure_paasta_remote_run_service_account(
+    kube_client: KubeClient, namespace: str
+) -> None:
+    """Checks whether a ServiceAccount named paasta-remote-run exists in the namespace,
+    and if it doesn't, create it and bind paasta-remote-run-role to it."""
+    service_accounts = get_all_service_accounts(kube_client, namespace=namespace)
+    service_account_names = [item.metadata.name for item in service_accounts]
+    if "paasta-remote-run" not in service_account_names:
+        log.warning(
+            f"Creating service account paasta-remote-run in {namespace} namespace as it does not exist"
+        )
+        service_account = V1ServiceAccount(
+            metadata=V1ObjectMeta(name="paasta-remote-run", namespace=namespace)
+        )
+        role_binding = V1RoleBinding(
+            metadata=V1ObjectMeta(
+                name="paasta-remote-run-role",
+                namespace=namespace,
+            ),
+            role_ref=V1RoleRef(
+                api_group="rbac.authorization.k8s.io",
+                kind="ClusterRole",
+                name="paasta-remote-run-role",
+            ),
+            subjects=[
+                V1Subject(
+                    kind="ServiceAccount",
+                    name="paasta-remote-run",
+                ),
+            ],
+        )
+        kube_client.core.create_namespaced_service_account(
+            namespace=namespace, body=service_account
+        )
+        kube_client.rbac.create_namespaced_role_binding(
+            namespace=namespace, body=role_binding
+        )
+
+
 def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None:
     if not namespace.startswith("paastasvc-"):
         log.debug(

From 7474f8e9842a80eb1101a342fc336b1ce29abfbd Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Wed, 6 Nov 2024 18:46:47 -0800
Subject: [PATCH 07/13] Service account, role binding, token

---
 paasta_tools/api/api.py                |  6 +++---
 paasta_tools/api/api_docs/oapi.yaml    |  2 +-
 paasta_tools/api/api_docs/swagger.json | 23 ++++++++++++++++++++---
 paasta_tools/api/client.py             |  2 ++
 paasta_tools/api/views/instance.py     | 21 ++++++++++++++++-----
 paasta_tools/cli/cmds/remote_run_2.py  | 19 +++++++++++++------
 paasta_tools/kubernetes_tools.py       | 13 +++++++++----
 paasta_tools/paasta_remote_run_2.py    |  9 +++++++--
 8 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py
index 0f3ba149a2..5c2cd2f7f4 100644
--- a/paasta_tools/api/api.py
+++ b/paasta_tools/api/api.py
@@ -149,15 +149,15 @@ def make_app(global_config=None):
         "/v1/services/{service}/{instance}/tasks/{task_id}",
     )
     config.add_route(
-        "remote_run_start",
+        "remote_run.start",
         "/v1/remote_run/{service}/{instance}/start",
     )
     config.add_route(
-        "remote_run_stop",
+        "remote_run.stop",
         "/v1/remote_run/{service}/{instance}/stop",
     )
     config.add_route(
-        "remote_run_token",
+        "remote_run.token",
         "/v1/remote_run/{service}/{instance}/token",
     )
     config.add_route("service.list", "/v1/services/{service}")
diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index b70e434fab..f3aefc7fec 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1637,7 +1637,7 @@ paths:
               type: object
               properties:
                 interactive:
-                  type: bool
+                  type: boolean
                 user:
                   type: string
                 image:
diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json
index 4c4b4e42a6..0f5244bdc3 100644
--- a/paasta_tools/api/api_docs/swagger.json
+++ b/paasta_tools/api/api_docs/swagger.json
@@ -884,11 +884,28 @@
                         "type": "string"
                     },
                     {
-                        "in": "query",
+                        "in": "body",
                         "description": "Username",
-                        "name": "user",
+                        "name": "json_body",
                         "required": true,
-                        "type": "string"
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "interactive": {
+                                    "type": "boolean"
+                                },
+                                "user": {
+                                    "type": "string"
+                                },
+                                "image": {
+                                    "type": "string"
+                                }
+                            },
+                            "required": [
+                                "interactive",
+                                "user"
+                            ]
+                        }
                     }
                 ]
             }
diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py
index fc82dcc15c..3358d4649f 100644
--- a/paasta_tools/api/client.py
+++ b/paasta_tools/api/client.py
@@ -37,6 +37,7 @@ class PaastaOApiClient:
     default: paastaapis.DefaultApi
     resources: paastaapis.ResourcesApi
     service: paastaapis.ServiceApi
+    remote_run: paastaapis.RemoteRunApi
     api_error: Type[paastaapi.ApiException]
     connection_error: Type[paastaapi.ApiException]
     timeout_error: Type[paastaapi.ApiException]
@@ -68,6 +69,7 @@ def get_paasta_oapi_client_by_url(
         default=paastaapis.DefaultApi(client),
         resources=paastaapis.ResourcesApi(client),
         service=paastaapis.ServiceApi(client),
+        remote_run=paastaapis.RemoteRunApi(client),
         api_error=paastaapi.ApiException,
         connection_error=paastaapi.ApiException,
         timeout_error=paastaapi.ApiException,
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 224fd8fd4c..c9c3b243b6 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -16,6 +16,7 @@
 PaaSTA service instance status/start/stop etc.
 """
 import asyncio
+import json
 import logging
 import re
 import traceback
@@ -393,18 +394,28 @@ def remote_run_stop(request):
     pass
 
 
-@view_config(route_name="remote_run.token", request_method="POST", renderer="json")
+@view_config(route_name="remote_run.token", request_method="GET", renderer="json")
 def remote_run_token(request):
-    pass
+    service = request.swagger_data.get("service")
+    instance = request.swagger_data.get("instance")
+    user = request.swagger_data.get("user")
+    try:
+        token = paasta_remote_run_2.create_exec_token(
+            service, instance, user, settings.cluster
+        )
+    except:
+        error_message = traceback.format_exc()
+        raise ApiFailure(error_message, 500)
+    return json.dumps({"token": token})
 
 
 @view_config(route_name="remote_run.start", request_method="POST", renderer="json")
 def remote_run_start(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
-    user = request.swagger_data.get("user")
-    interactive = request.swagger_data.get("interactive", True)
-    recreate = request.swagger_data.get("recreate", True)
+    user = request.swagger_data["json_body"].get("user")
+    interactive = request.swagger_data["json_body"].get("interactive", True)
+    recreate = request.swagger_data["json_body"].get("recreate", True)
 
     try:
         response = paasta_remote_run_2.remote_run_start(
diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py
index 5896a5aedb..a2e2965909 100644
--- a/paasta_tools/cli/cmds/remote_run_2.py
+++ b/paasta_tools/cli/cmds/remote_run_2.py
@@ -126,14 +126,13 @@ def paasta_remote_run(
     if not client:
         print("Cannot get a paasta-api client")
         exit(1)
-
+    response = client.remote_run.remote_run_start(
+        service,
+        instance,
+        {"user": get_username(), "interactive": True},
+    )
     try:
         # TODO add image argument if build
-        response = client.service.remote_run(
-            service=service,
-            instance=instance,
-            user=get_username(),
-        )
         print("Reponse was: ", response)
         response = json.loads(response)
     except client.api_error as exc:
@@ -167,6 +166,14 @@ def paasta_remote_run(
     print("Run the following command to enter your service pod")
     print(exec_command)
 
+    print("Getting token")
+    try:
+        token = client.remote_run.remote_run_token(
+            service=service, instance=instance, user="qlo"
+        )
+        print("Got token:", token)
+    except:
+        raise
     return ret_code
 
 
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index 90b97afc4e..b74be1f599 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -2047,7 +2047,9 @@ def format_as_kubernetes_job(self) -> V1Job:
                 spec=V1JobSpec(
                     active_deadline_seconds=3600,
                     template=self.get_pod_template_spec(
-                        git_sha=git_sha, system_paasta_config=system_paasta_config
+                        git_sha=git_sha,
+                        system_paasta_config=system_paasta_config,
+                        restart=False,
                     ),
                 ),
             )
@@ -2182,7 +2184,10 @@ def has_routable_ip(
         return "false"
 
     def get_pod_template_spec(
-        self, git_sha: str, system_paasta_config: SystemPaastaConfig
+        self,
+        git_sha: str,
+        system_paasta_config: SystemPaastaConfig,
+        restart: bool = True,
     ) -> V1PodTemplateSpec:
         service_namespace_config = load_service_namespace_config(
             service=self.service, namespace=self.get_nerve_namespace()
@@ -2221,7 +2226,7 @@ def get_pod_template_spec(
             ),
             share_process_namespace=True,
             node_selector=self.get_node_selector(),
-            restart_policy="Always",
+            restart_policy="Always" if restart else "Never",
             volumes=self.get_pod_volumes(
                 docker_volumes=docker_volumes + hacheck_sidecar_volumes,
                 aws_ebs_volumes=self.get_aws_ebs_volumes(),
@@ -4450,7 +4455,7 @@ def get_kubernetes_secret_env_variables(
 def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str):
     """Create a short lived token for exec"""
     audience = "remote-run-" + user
-    service_account = "default"
+    service_account = "paasta-remote-run"
     token_spec = V1TokenRequestSpec(
         expiration_seconds=600, audiences=[audience]  # minimum allowed by k8s
     )
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index e2f49caf2a..4553ab0b49 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -12,14 +12,19 @@
 from paasta_tools.utils import DEFAULT_SOA_DIR
 
 
-def create_exec_token(namespace):
+def create_exec_token(service, instance, user, cluster):
     """Creates a short lived token for execing into a pod"""
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
+    # Load the service deployment settings
+    deployment = load_kubernetes_service_config_no_cache(
+        service, instance, cluster, DEFAULT_SOA_DIR
+    )
+    namespace = deployment.get_namespace()
     try:
         token = create_temp_exec_token(kube_client, namespace, user)
     except ApiException as E:
         raise
-    return token["token"]
+    return token.status.token
 
 
 def remote_run_start(service, instance, user, cluster, interactive, recreate):

From 732d369b3c944b47f6a4824fb262e3ee8105b073 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Fri, 8 Nov 2024 18:58:34 -0800
Subject: [PATCH 08/13] Minor change to token generation

---
 paasta_tools/cli/cmds/remote_run_2.py | 28 ++++++++++++++-------------
 paasta_tools/kubernetes_tools.py      |  3 +--
 paasta_tools/paasta_remote_run_2.py   |  2 +-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py
index a2e2965909..8e6a1323e3 100644
--- a/paasta_tools/cli/cmds/remote_run_2.py
+++ b/paasta_tools/cli/cmds/remote_run_2.py
@@ -154,26 +154,28 @@ def paasta_remote_run(
         return ret_code
 
     pod_name, namespace = response["pod_name"], response["namespace"]
-    exec_command_tmpl = "kubectl{eks}-{cluster} exec -it -n {namespace} {pod} /bin/bash"
-    exec_command = exec_command_tmpl.format(
-        eks="-eks" if is_eks else "", cluster=cluster, namespace=namespace, pod=pod_name
-    )
     print("Pod launched successfully:", pod_name)
 
-    # TODO figure out how to get this to work
-    # print('Attaching shell')
-    # cmd = subprocess.Popen(exec_command.split(' '))
-    print("Run the following command to enter your service pod")
-    print(exec_command)
-
-    print("Getting token")
     try:
         token = client.remote_run.remote_run_token(
             service=service, instance=instance, user="qlo"
         )
-        print("Got token:", token)
+        token = json.loads(token)["token"]
     except:
         raise
+
+    # TODO figure out how to get this to work
+    exec_command_tmpl = "kubectl{eks}-{cluster} --token {token} exec -it -n {namespace} {pod} -- /bin/bash"
+    exec_command = exec_command_tmpl.format(
+        eks="-eks" if is_eks else "",
+        cluster=cluster,
+        namespace=namespace,
+        pod=pod_name,
+        token=token,
+    )
+    print("Running command", exec_command)
+    # cmd = subprocess.Popen(exec_command.split(' '))
+
     return ret_code
 
 
@@ -183,5 +185,5 @@ def remote_run(args) -> int:
         "/nail/home/qlo/paasta_config/paasta/"
     )
     return paasta_remote_run(
-        args.cluster, args.service, args.instance, system_paasta_config, 1, False
+        args.cluster, args.service, args.instance, system_paasta_config, 1, True
     )
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index b74be1f599..5a8ff26777 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -4454,10 +4454,9 @@ def get_kubernetes_secret_env_variables(
 
 def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str):
     """Create a short lived token for exec"""
-    audience = "remote-run-" + user
     service_account = "paasta-remote-run"
     token_spec = V1TokenRequestSpec(
-        expiration_seconds=600, audiences=[audience]  # minimum allowed by k8s
+        expiration_seconds=600, audiences=[]  # minimum allowed by k8s
     )
     request = AuthenticationV1TokenRequest(spec=token_spec)
     response = kube_client.core.create_namespaced_service_account_token(
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 4553ab0b49..7f7d6fdaf2 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -32,7 +32,7 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate):
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
 
     # TODO hardcoded for now
-    is_eks = False
+    is_eks = True
 
     # Load the service deployment settings
     deployment = load_kubernetes_service_config_no_cache(

From 926b76d7ac60feeefdb4448a7d387e2515602d98 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Fri, 8 Nov 2024 19:01:33 -0800
Subject: [PATCH 09/13] eks support

---
 paasta_tools/paasta_remote_run_2.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 7f7d6fdaf2..572851d802 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -3,6 +3,7 @@
 
 from kubernetes.client.exceptions import ApiException
 
+from paasta_tools.eks_tools import load_eks_service_config_no_cache
 from paasta_tools.kubernetes.application.controller_wrappers import (
     get_application_wrapper,
 )
@@ -15,10 +16,16 @@
 def create_exec_token(service, instance, user, cluster):
     """Creates a short lived token for execing into a pod"""
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
+    is_eks = True
     # Load the service deployment settings
-    deployment = load_kubernetes_service_config_no_cache(
-        service, instance, cluster, DEFAULT_SOA_DIR
-    )
+    if is_eks:
+        deployment = load_eks_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
+    else:
+        deployment = load_kubernetes_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
     namespace = deployment.get_namespace()
     try:
         token = create_temp_exec_token(kube_client, namespace, user)
@@ -35,9 +42,14 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate):
     is_eks = True
 
     # Load the service deployment settings
-    deployment = load_kubernetes_service_config_no_cache(
-        service, instance, cluster, DEFAULT_SOA_DIR
-    )
+    if is_eks:
+        deployment = load_eks_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
+    else:
+        deployment = load_kubernetes_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
     namespace = deployment.get_namespace()
 
     # Set to interactive mode

From a5cc802beb95885e115f1d6d4636a08d79bece5b Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Mon, 11 Nov 2024 18:34:53 -0800
Subject: [PATCH 10/13] pty.spawn, stop

---
 paasta_tools/api/api_docs/swagger.json | 16 ++++-
 paasta_tools/api/views/instance.py     | 13 +++-
 paasta_tools/cli/cmds/remote_run_2.py  | 90 ++++++++++++++++++++------
 paasta_tools/paasta_remote_run_2.py    | 32 ++++++---
 4 files changed, 118 insertions(+), 33 deletions(-)

diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json
index 0f5244bdc3..5fe2063367 100644
--- a/paasta_tools/api/api_docs/swagger.json
+++ b/paasta_tools/api/api_docs/swagger.json
@@ -947,11 +947,21 @@
                         "type": "string"
                     },
                     {
-                        "in": "query",
+                        "in": "body",
                         "description": "Username",
-                        "name": "user",
+                        "name": "json_body",
                         "required": true,
-                        "type": "string"
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "user": {
+                                    "type": "string"
+                                }
+                            },
+                            "required": [
+                                "user"
+                            ]
+                        }
                     }
                 ]
             }
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index c9c3b243b6..9be299ab88 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -391,7 +391,18 @@ def instance_mesh_status(request):
 
 @view_config(route_name="remote_run.stop", request_method="POST", renderer="json")
 def remote_run_stop(request):
-    pass
+    service = request.swagger_data.get("service")
+    instance = request.swagger_data.get("instance")
+    user = request.swagger_data["json_body"].get("user")
+
+    try:
+        response = paasta_remote_run_2.remote_run_stop(
+            service, instance, user, settings.cluster
+        )
+    except:
+        error_message = traceback.format_exc()
+        raise ApiFailure(error_message, 500)
+    return response
 
 
 @view_config(route_name="remote_run.token", request_method="GET", renderer="json")
diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py
index 8e6a1323e3..67cac4aa20 100644
--- a/paasta_tools/cli/cmds/remote_run_2.py
+++ b/paasta_tools/cli/cmds/remote_run_2.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import json
 import os
-import subprocess
+import pty
 import sys
 import time
 
@@ -66,21 +66,27 @@ def add_subparser(
         help="Run stuff remotely.",
         description=("'paasta remote-run' runs stuff remotely "),
     )
-    remote_run_parser.add_argument(
+    subparsers = remote_run_parser.add_subparsers(dest="remote_run_command")
+    start_parser = subparsers.add_parser(
+        "start",
+        help="Start or connect to a remote-run job",
+        description=("Starts or connects to a remote-run-job"),
+    )
+    start_parser.add_argument(
         "-b",
         "--build",
         dest="build",
         help="Build the image from current directory",
         action="store_true",
     )
-    remote_run_parser.add_argument(
+    start_parser.add_argument(
         "-y",
         "--yelpsoa-config-root",
         dest="yelpsoa_config_root",
         help="A directory from which yelpsoa-configs should be read from",
         default=DEFAULT_SOA_DIR,
     )
-    remote_run_parser.add_argument(
+    start_parser.add_argument(
         "-I",
         "--interactive",
         help=(
@@ -91,20 +97,27 @@ def add_subparser(
         required=False,
         default=False,
     )
-    add_common_args_to_parser(remote_run_parser)
+    stop_parser = subparsers.add_parser(
+        "stop",
+        help="Stop! In the name of Paasta",
+        description="Stop your remote-run job if it exists",
+    )
+    add_common_args_to_parser(start_parser)
+    add_common_args_to_parser(stop_parser)
     remote_run_parser.set_defaults(command=remote_run)
 
 
-def paasta_remote_run(
-    cluster: str,
-    service: str,
-    instance: str,
+def paasta_remote_run_start(
+    args,
     system_paasta_config: SystemPaastaConfig,
-    verbose: int,
-    is_eks: bool = False,
-    build: bool = False,
+    verbose: int = 0,
+    is_eks: bool = True,
 ) -> int:
 
+    cluster = args.cluster
+    service = args.service
+    instance = args.instance
+    build = args.build
     output = []
     ret_code = 0
 
@@ -126,14 +139,14 @@ def paasta_remote_run(
     if not client:
         print("Cannot get a paasta-api client")
         exit(1)
+
+    # TODO add image argument if build
     response = client.remote_run.remote_run_start(
         service,
         instance,
         {"user": get_username(), "interactive": True},
     )
     try:
-        # TODO add image argument if build
-        print("Reponse was: ", response)
         response = json.loads(response)
     except client.api_error as exc:
         print(exc, file=sys.stderr)
@@ -153,8 +166,15 @@ def paasta_remote_run(
         print("\n".join(output))
         return ret_code
 
+    if response["status"] == 409:
+        print(
+            "A remote-run container already exists. Run remote-run stop first if you'd like a new one."
+        )
+        attach = input("Would you like to attach to it? y/n ")
+        if attach == "n":
+            return 0
+    print(response)
     pod_name, namespace = response["pod_name"], response["namespace"]
-    print("Pod launched successfully:", pod_name)
 
     try:
         token = client.remote_run.remote_run_token(
@@ -164,7 +184,6 @@ def paasta_remote_run(
     except:
         raise
 
-    # TODO figure out how to get this to work
     exec_command_tmpl = "kubectl{eks}-{cluster} --token {token} exec -it -n {namespace} {pod} -- /bin/bash"
     exec_command = exec_command_tmpl.format(
         eks="-eks" if is_eks else "",
@@ -173,17 +192,46 @@ def paasta_remote_run(
         pod=pod_name,
         token=token,
     )
-    print("Running command", exec_command)
-    # cmd = subprocess.Popen(exec_command.split(' '))
+    cmd = pty.spawn(exec_command.split(" "))
 
     return ret_code
 
 
+def paasta_remote_run_stop(
+    args,
+    system_paasta_config: SystemPaastaConfig,
+    verbose: int = 0,
+    is_eks: bool = True,
+) -> int:
+
+    cluster = args.cluster
+    service = args.service
+    instance = args.instance
+
+    ret_code = 0
+
+    client = get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
+        system_paasta_config=system_paasta_config,
+    )
+    if not client:
+        print("Cannot get a paasta-api client")
+        exit(1)
+    response = client.remote_run.remote_run_stop(
+        service,
+        instance,
+        {"user": get_username()},
+    )
+    print(response)
+    return ret_code
+
+
 def remote_run(args) -> int:
     """Run stuff, but remotely!"""
     system_paasta_config = load_system_paasta_config(
         "/nail/home/qlo/paasta_config/paasta/"
     )
-    return paasta_remote_run(
-        args.cluster, args.service, args.instance, system_paasta_config, 1, True
-    )
+    if args.remote_run_command == "start":
+        return paasta_remote_run_start(args, system_paasta_config)
+    elif args.remote_run_command == "stop":
+        return paasta_remote_run_stop(args, system_paasta_config)
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 572851d802..6daf6c04b5 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -64,18 +64,19 @@ def remote_run_start(service, instance, user, cluster, interactive, recreate):
     app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks)
 
     # Launch pod
+    status = 200
     try:
         app_wrapper.create(kube_client)
     except ApiException as e:
         if e.status == 409:
             # Job already running
-            return json.dumps({"status": "409", "reason": "Job already running"})
+            status = 409
         raise
 
     pod = wait_until_pod_running(kube_client, namespace, job_name)
 
     return json.dumps(
-        {"status": "200", "pod_name": pod.metadata.name, "namespace": namespace}
+        {"status": status, "pod_name": pod.metadata.name, "namespace": namespace}
     )
 
 
@@ -117,9 +118,24 @@ def wait_until_pod_running(kube_client, namespace, job_name):
     return pod
 
 
-# def remote_run_stop():
-# TODO Should this happen here or should the client kill the deployment directly?
-# Load the service deployment settings
-# deployment = load_kubernetes_service_config_no_cache(
-#    service, instance, cluster, DEFAULT_SOA_DIR
-# )
+def remote_run_stop(service, instance, user, cluster):
+    # TODO Overriding the kube client config for now as the api has limited permissions
+    kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
+    is_eks = True
+    if is_eks:
+        deployment = load_eks_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
+    else:
+        deployment = load_kubernetes_service_config_no_cache(
+            service, instance, cluster, DEFAULT_SOA_DIR
+        )
+    namespace = deployment.get_namespace()
+    formatted_job = deployment.format_as_kubernetes_job()
+    job_name = f"remote-run-{user}-{formatted_job.metadata.name}"
+    formatted_job.metadata.name = job_name
+
+    app_wrapper = get_application_wrapper(formatted_job)
+    app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks)
+    app_wrapper.deep_delete(kube_client)
+    return json.dumps({"status": 200, "message": "Job successfully removed"})

From 5707d6cbe25442250d3c86c7b5c1eb947ed21406 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Mon, 11 Nov 2024 18:41:34 -0800
Subject: [PATCH 11/13] is_eks

---
 paasta_tools/api/views/instance.py  | 24 +++++++++++++++++++++---
 paasta_tools/paasta_remote_run_2.py | 12 ++++--------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 9be299ab88..c2a87ce794 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -394,10 +394,11 @@ def remote_run_stop(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
     user = request.swagger_data["json_body"].get("user")
+    is_eks = is_instance_eks(service, instance)
 
     try:
         response = paasta_remote_run_2.remote_run_stop(
-            service, instance, user, settings.cluster
+            service, instance, user, settings.cluster, is_eks
         )
     except:
         error_message = traceback.format_exc()
@@ -410,9 +411,10 @@ def remote_run_token(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
     user = request.swagger_data.get("user")
+    is_eks = is_instance_eks(service, instance)
     try:
         token = paasta_remote_run_2.create_exec_token(
-            service, instance, user, settings.cluster
+            service, instance, user, settings.cluster, is_eks
         )
     except:
         error_message = traceback.format_exc()
@@ -420,6 +422,21 @@ def remote_run_token(request):
     return json.dumps({"token": token})
 
 
+def is_instance_eks(service, instance):
+    try:
+        instance_type = validate_service_instance(
+            service, instance, settings.cluster, settings.soa_dir
+        )
+    except NoConfigurationForServiceError:
+        error_message = no_configuration_for_service_message(
+            settings.cluster,
+            service,
+            instance,
+        )
+        raise Exception(error_message, 404)
+    return instance_type == "eks"
+
+
 @view_config(route_name="remote_run.start", request_method="POST", renderer="json")
 def remote_run_start(request):
     service = request.swagger_data.get("service")
@@ -428,9 +445,10 @@ def remote_run_start(request):
     interactive = request.swagger_data["json_body"].get("interactive", True)
     recreate = request.swagger_data["json_body"].get("recreate", True)
 
+    is_eks = is_instance_eks(service, instance)
     try:
         response = paasta_remote_run_2.remote_run_start(
-            service, instance, user, settings.cluster, interactive, recreate
+            service, instance, user, settings.cluster, interactive, recreate, is_eks
         )
     except Exception:
         error_message = traceback.format_exc()
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 6daf6c04b5..273e760a86 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -13,10 +13,10 @@
 from paasta_tools.utils import DEFAULT_SOA_DIR
 
 
-def create_exec_token(service, instance, user, cluster):
+def create_exec_token(service, instance, user, cluster, is_eks):
     """Creates a short lived token for execing into a pod"""
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
-    is_eks = True
+
     # Load the service deployment settings
     if is_eks:
         deployment = load_eks_service_config_no_cache(
@@ -34,13 +34,10 @@ def create_exec_token(service, instance, user, cluster):
     return token.status.token
 
 
-def remote_run_start(service, instance, user, cluster, interactive, recreate):
+def remote_run_start(service, instance, user, cluster, interactive, recreate, is_eks):
     # TODO Overriding the kube client config for now as the api has limited permissions
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
 
-    # TODO hardcoded for now
-    is_eks = True
-
     # Load the service deployment settings
     if is_eks:
         deployment = load_eks_service_config_no_cache(
@@ -118,10 +115,9 @@ def wait_until_pod_running(kube_client, namespace, job_name):
     return pod
 
 
-def remote_run_stop(service, instance, user, cluster):
+def remote_run_stop(service, instance, user, cluster, is_eks):
     # TODO Overriding the kube client config for now as the api has limited permissions
     kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf")
-    is_eks = True
     if is_eks:
         deployment = load_eks_service_config_no_cache(
             service, instance, cluster, DEFAULT_SOA_DIR

From 9a302a28acfffbda1e40f41711af7070e8d84549 Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Thu, 14 Nov 2024 18:33:41 -0800
Subject: [PATCH 12/13] Use pod specific roles and service accounts

---
 paasta_tools/kubernetes_tools.py    | 111 +++++++++++++++++-----------
 paasta_tools/paasta_remote_run_2.py |  12 ++-
 2 files changed, 80 insertions(+), 43 deletions(-)

diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index 5a8ff26777..a557ecd686 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -19,6 +19,7 @@
 import math
 import os
 import re
+import time
 from datetime import datetime
 from datetime import timezone
 from enum import Enum
@@ -98,11 +99,13 @@
 from kubernetes.client import V1PodSecurityContext
 from kubernetes.client import V1PodSpec
 from kubernetes.client import V1PodTemplateSpec
+from kubernetes.client import V1PolicyRule
 from kubernetes.client import V1PreferredSchedulingTerm
 from kubernetes.client import V1Probe
 from kubernetes.client import V1ProjectedVolumeSource
 from kubernetes.client import V1ReplicaSet
 from kubernetes.client import V1ResourceRequirements
+from kubernetes.client import V1Role
 from kubernetes.client import V1RoleBinding
 from kubernetes.client import V1RoleRef
 from kubernetes.client import V1RollingUpdateDeployment
@@ -2834,7 +2837,6 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
             else:
                 raise
 
-    ensure_paasta_remote_run_service_account(kube_client, namespace)
     ensure_paasta_api_rolebinding(kube_client, namespace)
     ensure_paasta_namespace_limits(kube_client, namespace)
 
@@ -2868,45 +2870,6 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No
         )
 
 
-def ensure_paasta_remote_run_service_account(
-    kube_client: KubeClient, namespace: str
-) -> None:
-    """Checks whether a ServiceAccount named paasta-remote-run exists in the namespace,
-    and if it doesn't, create it and bind paasta-remote-run-role to it."""
-    service_accounts = get_all_service_accounts(kube_client, namespace=namespace)
-    service_account_names = [item.metadata.name for item in service_accounts]
-    if "paasta-remote-run" not in service_account_names:
-        log.warning(
-            f"Creating service account paasta-remote-run in {namespace} namespace as it does not exist"
-        )
-        service_account = V1ServiceAccount(
-            metadata=V1ObjectMeta(name="paasta-remote-run", namespace=namespace)
-        )
-        role_binding = V1RoleBinding(
-            metadata=V1ObjectMeta(
-                name="paasta-remote-run-role",
-                namespace=namespace,
-            ),
-            role_ref=V1RoleRef(
-                api_group="rbac.authorization.k8s.io",
-                kind="ClusterRole",
-                name="paasta-remote-run-role",
-            ),
-            subjects=[
-                V1Subject(
-                    kind="ServiceAccount",
-                    name="paasta-remote-run",
-                ),
-            ],
-        )
-        kube_client.core.create_namespaced_service_account(
-            namespace=namespace, body=service_account
-        )
-        kube_client.rbac.create_namespaced_role_binding(
-            namespace=namespace, body=role_binding
-        )
-
-
 def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None:
     if not namespace.startswith("paastasvc-"):
         log.debug(
@@ -4452,9 +4415,10 @@ def get_kubernetes_secret_env_variables(
     return decrypted_secrets
 
 
-def create_temp_exec_token(kube_client: KubeClient, namespace: str, user: str):
+def create_temp_exec_token(
+    kube_client: KubeClient, namespace: str, service_account: str
+):
     """Create a short lived token for exec"""
-    service_account = "paasta-remote-run"
     token_spec = V1TokenRequestSpec(
         expiration_seconds=600, audiences=[]  # minimum allowed by k8s
     )
@@ -4560,3 +4524,66 @@ def add_volumes_for_authenticating_services(
     ):
         config_volumes = [token_config, *config_volumes]
     return config_volumes
+
+
+def create_pod_scoped_role(kube_client, namespace, user, pod_name):
+    pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12]
+    policy = V1PolicyRule(
+        verbs=["create", "get"],
+        resources=["pods", "pods/exec"],
+        resource_names=[pod_name],
+        api_groups=[""],
+    )
+    role_name = f"remote-run-role-{pod_name_hash}"
+    role = V1Role(
+        rules=[policy],
+        metadata=V1ObjectMeta(
+            name=role_name,
+            labels={"CreationTime": str(int(time.time())), "PodOwner": user},
+        ),
+    )
+
+    kube_client.core.create_namespaced_role(namespace=namespace, body=role)
+    return role_name
+
+
+def bind_role_to_service_account(kube_client, namespace, service_account, role):
+    role_binding = V1RoleBinding(
+        metadata=V1ObjectMeta(
+            name=f"binding-{role}",
+            namespace=namespace,
+        ),
+        role_ref=V1RoleRef(
+            api_group="rbac.authorization.k8s.io",
+            kind="Role",
+            name=role,
+        ),
+        subjects=[
+            V1Subject(
+                kind="ServiceAccount",
+                name=service_account,
+            ),
+        ],
+    )
+    kube_client.rbac.create_namespaced_role_binding(
+        namespace=namespace, body=role_binding
+    )
+
+
+def create_paasta_remote_run_service_account(
+    kube_client: KubeClient, namespace: str, username: str, pod_name: str
+) -> None:
+    pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12]
+    service_account_name = f"remote-run-{username}-{pod_name_hash}"
+    service_accounts = get_all_service_accounts(kube_client, namespace=namespace)
+    service_account_names = [item.metadata.name for item in service_accounts]
+    if service_account_name in service_account_names:
+        return service_account_name
+
+    service_account = V1ServiceAccount(
+        metadata=V1ObjectMeta(name=service_account_name, namespace=namespace)
+    )
+    kube_client.core.create_namespaced_service_account(
+        namespace=namespace, body=service_account
+    )
+    return service_account_name
diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py
index 273e760a86..75cedb2d63 100755
--- a/paasta_tools/paasta_remote_run_2.py
+++ b/paasta_tools/paasta_remote_run_2.py
@@ -27,8 +27,18 @@ def create_exec_token(service, instance, user, cluster, is_eks):
             service, instance, cluster, DEFAULT_SOA_DIR
         )
     namespace = deployment.get_namespace()
+    formatted_job = deployment.format_as_kubernetes_job()
+    job_name = f"remote-run-{user}-{formatted_job.metadata.name}"
+
     try:
-        token = create_temp_exec_token(kube_client, namespace, user)
+        pod = find_pod(kube_client, namespace, job_name)
+        pod_name = pod.metadata.name
+        service_account = create_paasta_remote_run_service_account(
+            kubeclient, namespace, user, pod_name
+        )
+        role = create_pod_scoped_role(kube_client, namespace, pod_name, user)
+        bind_role_service_account(kube_client, namespace, service_account, role)
+        token = create_temp_exec_token(kube_client, namespace, service_account)
     except ApiException as E:
         raise
     return token.status.token

From 19f90ab4da5817abd12095d319d09d71512df89d Mon Sep 17 00:00:00 2001
From: Quentin Long <qlo@yelp.com>
Date: Mon, 18 Nov 2024 18:04:24 -0800
Subject: [PATCH 13/13] Removal of remote-run service accounts and roles

---
 paasta_tools/kubernetes_tools.py | 50 ++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index a557ecd686..cef7b239b0 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -19,8 +19,8 @@
 import math
 import os
 import re
-import time
 from datetime import datetime
+from datetime import timedelta
 from datetime import timezone
 from enum import Enum
 from functools import lru_cache
@@ -2839,6 +2839,7 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
 
     ensure_paasta_api_rolebinding(kube_client, namespace)
     ensure_paasta_namespace_limits(kube_client, namespace)
+    remove_remote_run_accounts_and_roles(kube_client, namespace)
 
 
 def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> None:
@@ -4539,7 +4540,7 @@ def create_pod_scoped_role(kube_client, namespace, user, pod_name):
         rules=[policy],
         metadata=V1ObjectMeta(
             name=role_name,
-            labels={"CreationTime": str(int(time.time())), "PodOwner": user},
+            labels={"PodOwner": user},
         ),
     )
 
@@ -4581,9 +4582,52 @@ def create_paasta_remote_run_service_account(
         return service_account_name
 
     service_account = V1ServiceAccount(
-        metadata=V1ObjectMeta(name=service_account_name, namespace=namespace)
+        metadata=V1ObjectMeta(
+            name=service_account_name,
+            namespace=namespace,
+            labels={"PodOwner": username},
+        )
     )
     kube_client.core.create_namespaced_service_account(
         namespace=namespace, body=service_account
     )
     return service_account_name
+
+
+def get_namespaced_roles(kube_client, namespace):
+    return kube_client.rbac.list_namespaced_role(namespace).items
+
+
+def remove_remote_run_accounts_and_roles(
+    kube_client: KubeClient,
+    namespace: str,
+) -> None:
+    service_accounts = get_all_service_accounts(kube_client, namespace)
+    roles = get_namespaced_roles(kube_client, namespace)
+
+    current_time = datetime.utcnow().replace(tzinfo=timezone.utc)
+    age_limit = timedelta(seconds=60)
+
+    for delete_func, entity_list in (
+        (delete_namespaced_service_account, service_accounts),
+        (delete_namespaced_role, roles),
+    ):
+        for entity in entity_list:
+            if not entity.metadata.name.startswith("remote-run-"):
+                continue
+            entity_age = current_time - entity.metadata.creation_timestamp
+            if entity_age < age_limit:
+                continue
+            delete_func(kube_client, namespace, entity)
+
+
+def delete_namespaced_service_account(
+    kube_client: KubeClient, namespace: str, service_account: V1ServiceAccount
+):
+    kube_client.core.delete_namespaced_service_account(
+        service_account.metadata.name, namespace
+    )
+
+
+def delete_namespaced_role(kube_client: KubeClient, namespace: str, role: V1Role):
+    kube_client.rbac.delete_namespaced_role(role.metadata.name, namespace)