From f8138f109958cd239cf9af19d2871b24c64a39d0 Mon Sep 17 00:00:00 2001 From: "Christian R. Garcia" Date: Mon, 18 Nov 2024 10:52:48 -0800 Subject: [PATCH] debug traefik timeouts for default entryPoint quotes? global tcp servers transport? another attempt this isn't in the docs? back to working simplify? simplify again attempt back down remove entypoint so it's only set in static config via cli rename entryPoint to Traefik. remove entryPoint from dynamic config again explicit entryPoint reference for tcp routers Add entryPoint to tcp routers and not just http Update for auth derive tenant from host update fuller log update Better log Back to _tapis_pods Upped set cookie Updating update update update add response update debug change redirect update update update update update get_username update cookie setting update update last redirect? RedirectResponse status_code last redirect Update cookies debug debug update tag networking take into account template derivation import update update update update test1 test2 attempt logs update update Changes redirect back to user_pod? redirect debug update username username two? email required? maybe this? update need the @ wrong location debug Fix double @s --- service/api_pods_podid_func.py | 160 ++++++++++++++++++-------- service/health_central.py | 5 +- service/models_pods.py | 6 +- service/models_templates_tags.py | 76 +++++++++++- service/templates/traefik-template.j2 | 19 ++- 5 files changed, 202 insertions(+), 64 deletions(-) diff --git a/service/api_pods_podid_func.py b/service/api_pods_podid_func.py index 0a68738..7774a41 100644 --- a/service/api_pods_podid_func.py +++ b/service/api_pods_podid_func.py @@ -4,9 +4,11 @@ from models_misc import SetPermission from channels import CommandChannel from codes import OFF, ON, RESTART, REQUESTED, STOPPED +import requests from tapisservice.tapisfastapi.utils import g, ok from tapisservice.config import conf from __init__ import t, BadRequestError +from models_templates_tags import combine_pod_and_template_recursively from tapisservice.logs import get_logger @@ -266,18 +268,19 @@ def is_logged_in(cookies): If so: return True, username, roles Otherwise: return False, None, None """ - if 'username' in cookies: - return True, cookies['username'], cookies['roles'] + logger.debug(f"Checking if logged in: {cookies}") + if 'X-Tapis-Username' in cookies: + return True, cookies['X-Tapis-Username'], None return False, None, None -def get_username(token): +def get_username(tapis_domain, token): """ Validate a Tapis JWT, `token`, and resolve it to a username. """ headers = {'Content-Type': 'text/html'} # call the userinfo endpoint - url = f"{config['tapis_base_url']}/v3/oauth2/userinfo" + url = f"https://{tapis_domain}/v3/oauth2/userinfo" headers = {'X-Tapis-Token': token} try: rsp = requests.get(url, headers=headers) @@ -303,8 +306,8 @@ async def pod_auth(pod_id_net, request: Request): Process a callback from a Tapis authorization server: 1) Get the authorization code from the query parameters. 2) Exchange the code for a token - 3) Add the user and token to the sessionhttps - 4) Redirect to the /data endpoint. + 3) Add the user and token to the session + 4) Redirect to the /data endpoint """ logger.debug(f"GET /pods/{pod_id_net}/auth - pod-auth, headers: {request.headers}, request.cookies: {request.cookies}") # In cases where networking key is not 'default', the pod_id_net is f"{pod_id}-{network_key}" @@ -333,19 +336,28 @@ async def pod_auth(pod_id_net, request: Request): # "X-Tapis-Token": xTapisToken # }, headers={ - "X-TapisUsername": username, + "X-Tapis-Username": username, "X-Tapis-Token": xTapisToken }) else: - authenticated, _, _ = is_logged_in(request.cookies) + authenticated, xTapisUsername, _ = is_logged_in(request.cookies) # if already authenticated, return 200, which will allow the request to continue in Traefik if authenticated: - return {'code': 200} #result = {'path':'/', 'code': 302} + username = xTapisUsername + "@tapis.io" + logger.debug(f"GET /pods/{pod_id_net}/auth - pod-auth, already authenticated X-Tapis-Username: {username}") + #return {'code': 200} #result = {'path':'/', 'code': 302} + return JSONResponse(content=ok("Already authenticated"), status_code=200, headers={"X-Tapis-Username": username}) # if not authenticated, start the OAuth flow pod = Pod.db_get_with_pk(pod_id, tenant=g.request_tenant_id, site=g.site_id) + if pod.template: + # Derive the final pod object by combining the pod and templates + final_pod = combine_pod_and_template_recursively(pod, pod.template, tenant=g.request_tenant_id, site=g.site_id) + else: + final_pod = pod + net_info = pod.networking.get(network_key, None) if not net_info: raise Exception(f"Pod {pod_id} does not have networking key that matches pod_id_net: {pod_id_net}") @@ -353,72 +365,71 @@ async def pod_auth(pod_id_net, request: Request): # Get info for clients # The goal is: https://tacc.develop.tapis.io/v3/pods/{{pod_id}}/auth - pod_id, tapis_domain = net_info['url'].split('.pods.') ## Should return `mypod` & `tacc.tapis.io` with proper tenant and schmu - if not net_info.get('tapis_auth', False): + pod_id, tapis_domain = net_info.url.split('.pods.') ## Should return `mypod` & `tacc.tapis.io` with proper tenant and schmu + tapis_tenant = tapis_domain.split('.')[0] + if not net_info.tapis_auth: return JSONResponse(content = f"This pod does not have tapis_auth configured in networking for this pod_id_net: {pod_id_net}. Leave or remedy.", status_code = 403) auth_url = f"https://{tapis_domain}/v3/pods/{pod_id_net}/auth" auth_callback_url = f"https://{tapis_domain}/v3/pods/{pod_id_net}/auth/callback" # should match client callback_url - tapis_auth_response_headers = net_info.get('tapis_auth_response_headers', []) + tapis_auth_response_headers = net_info.tapis_auth_response_headers client_id = f"PODS-SERVICE-{pod.k8_name}-{network_key}" #client_key = "4STQ^t&RGa$sah!SZ9zCP9UScGoEkS^GYLZDjjtjPBipp4kVLyrr@X" client_display_name = f"Tapis Pods Service Pod: {pod_id}" client_description = f"Tapis Pods Service Pod: {pod_id}" - oauth2_url = f"https://{tapis_domain}/v3/oauth2/authorize?client_id={client_id}&redirect_uri={auth_callback_url}&response_type=code" - + + logger.debug(f"GET /pods/{pod_id_net}/auth - pod-auth, headers: {request.headers}, request.cookies: {request.cookies}, tenant_id: {g.request_tenant_id}, derived_tenant_id: {tapis_tenant}, site_id: {g.site_id}") + + td = None # Create tapis client or update tapis client if needed try: + logger.debug(f"Creating client_id: {client_id}, tenant: {tapis_tenant}") res, td = t.authenticator.create_client( client_id = client_id, #client_key = client_key, callback_url = auth_callback_url, display_name = client_display_name, description = client_description, - _x_tapis_tenant = g.request_tenant_id, - _x_tapis_user = "pods", + _x_tapis_tenant = tapis_tenant, + _x_tapis_user = "_tapis_pods", _tapis_debug = True ) except BadRequestError as e: # Exceptions in 3 shouldn't have e.message (only e.args), but this one does. logger.debug(f"Got error creating client: {e.message}") if "This change would violate uniqueness constraints" in e.message: - logger.debug(f"Client already exists, updating client_id: {client_id}") + logger.debug(f"Client already exists, updating client_id: {client_id}, tenant: {tapis_tenant}") try: res, td = t.authenticator.update_client( client_id = client_id, callback_url = auth_callback_url, display_name = client_display_name, description = client_description, - _x_tapis_tenant = g.request_tenant_id, - _x_tapis_user = "pods", + _x_tapis_tenant = tapis_tenant, + _x_tapis_user = "_tapis_pods", _tapis_debug = True ) # Assuming you want to return a success response after updating - success_msg = f"Client {client_id} updated successfully. oauth2_url is: {oauth2_url}" + success_msg = f"Client {client_id} updated successfully." logger.info(success_msg) - #return JSONResponse(content = success_msg, status_code = 200) - return RedirectResponse(url=oauth2_url, status_code=302) except Exception as e: msg = (f"Error updating client_id: {client_id}. e: {e.args}, e: {e}, dir(e): {dir(e)}") logger.warning(msg) return JSONResponse(content = msg, status_code = 500) - msg = (f"Error creating client_id: {client_id}. e: {e.args}, e: {e.message}, dir(e): {dir(e)}") + msg = (f"Error creating client_id: {client_id}. e.message: {e.message}, e.request: {e.request}, e.response: {e.response}, tapis_debug = {td}") logger.warning(msg) - return JSONResponse(content = msg, status_code = 500) - - - - result = {'path': auth_callback_url, 'code': 302} - return RedirectResponse(url=auth_callback_url, status_code=200) +# return JSONResponse(content = msg, status_code = 500) + oauth2_url = f"https://{tapis_domain}/v3/oauth2/authorize?client_id={client_id}&redirect_uri={auth_callback_url}&response_type=code" + logger.debug(f"oauth2 url is: {oauth2_url}") + return RedirectResponse(url=oauth2_url, status_code=302) + # result = {'path': auth_callback_url, 'code': 302} return JSONResponse(content = str(result)) # Shouldn't be able to get here raise Exception(f"not implemented") - return ok("I promise I'm healthy.") - @router.get( "/pods/{pod_id_net}/auth/callback", @@ -433,38 +444,91 @@ def callback(pod_id_net, request: Request): network_key = parts[1] if len(parts) > 1 else 'default' pod = Pod.db_get_with_pk(pod_id, tenant=g.request_tenant_id, site=g.site_id) + if pod.template: + # Derive the final pod object by combining the pod and templates + final_pod = combine_pod_and_template_recursively(pod, pod.template, tenant=g.request_tenant_id, site=g.site_id) + else: + final_pod = pod + net_info = pod.networking.get(network_key, None) if not net_info: raise Exception(f"Pod {pod_id} does not have networking key that matches pod_id_net: {pod_id_net}") - pod_id, tapis_domain = net_info['url'].split('.pods.') ## Should return `mypod` & `tacc.tapis.io` with proper tenant and schmu + pod_id, tapis_domain = net_info.url.split('.pods.') ## Should return `mypod` & `tacc.tapis.io` with proper tenant and schmu + tapis_tenant = tapis_domain.split('.')[0] + if not net_info.tapis_auth: + return JSONResponse(content = f"This pod does not have tapis_auth configured in networking for this pod_id_net: {pod_id_net}. Leave or remedy.", status_code = 403) + + client_id = f"PODS-SERVICE-{pod.k8_name}-{network_key}" - return JSONResponse(content = f"Callback for pod_id_net: {pod_id_net}, tapis_domain: {tapis_domain}", status_code = 200) - # return JSONResponse(content = str(dir(request))) - # code = request.args.get('code') - # if not code: - # raise Exception(f"Error: No code in request; debug: {request.args}") + try: + res, td = t.authenticator.get_client( + client_id = client_id, + _x_tapis_tenant = tapis_tenant, + _x_tapis_user = "_tapis_pods", + _tapis_debug = True) + except Exception as e: + return JSONResponse(content=f"Error retrieving client: {e}", status_code=500) + + # return JSONResponse(content = f"Callback for pod_id_net: {pod_id_net}, tapis_domain: {tapis_domain}", status_code = 200) + code = request.query_params.get('code') + if not code: + raise Exception(f"Error: No code in request; debug: {request.query_params}") + logger.debug(f"GET /pods/{pod_id_net}/auth/callback - pod_auth_callback, code: {code}") url = f"https://{tapis_domain}/v3/oauth2/tokens" data = { - "code": "code", - "redirect_uri": f"https://{tapis_domain}/v3/oauth2/callback", + "code": code, + "redirect_uri": f"https://{tapis_domain}/v3/pods/{pod_id_net}/auth/callback", "grant_type": "authorization_code", } + try: - response = requests.post(url, data=data, auth=(config['client_id'], config['client_key'])) + logger.debug(dir(res)) + response = requests.post(url, data=data, auth=(client_id, res.client_key)) response.raise_for_status() - json_resp = json.loads(response.text) + logger.debug(f"GET /pods/{pod_id_net}/auth/callback callback request response: {response.text}") + json_resp = response.json() + #json_resp = json.loads(response.text) token = json_resp['result']['access_token']['access_token'] except Exception as e: raise Exception(f"Error generating Tapis token; debug: {e}") - username = auth.get_username(token) - - response = make_response(redirect(os.environ['FRONT_URL'], code=302)) + try: + logger.debug(f"GET /pods/{pod_id_net}/auth/callback - pod_auth_callback, token: {token}") + + username = get_username(tapis_domain=tapis_domain, token=token) + + username = username + content = {"message": f"Callback for pod_id_net: {pod_id_net}, tapis_domain: {tapis_domain}, username: {username}, token: {token}"} + +# response = JSONResponse(content=content, status_code=200) +# response = RedirectResponse(url=f"https://{tapis_domain}/v3/pods/{pod_id_net}/auth", status_code=302, headers={"X-Tapis-Username": username, "X-Tapis-Token": token}) + #response = JSONResponse(content=content, status_code=200) + response = RedirectResponse(url=f"https://{net_info.url}/auth", status_code=302) + + domain = conf.get('COOKIE_DOMAIN', f"{tapis_domain}") + logger.debug(f"About to set cookies. domain: {domain}, net_info.url: {net_info.url}") - domain = os.environ.get('COOKIE_DOMAIN', ".pods.icicle.tapis.io") - response.set_cookie("token", token, domain=domain, secure=True) - response.set_cookie("username", username, domain=domain, secure=True) + response.set_cookie("X-Tapis-Token", token, domain=net_info.url, secure=True) + response.set_cookie("X-Tapis-Username", username, domain=net_info.url, secure=True) + + response.set_cookie("X-Tapis-Token", token, domain=domain, secure=True) + response.set_cookie("X-Tapis-Username", username, domain=domain, secure=True) + + logger.debug(f"GET /pods/{pod_id_net}/auth/callback - pod_auth_callback last bit, response: {response}, net_info: {net_info.url}") + + return response + except Exception as e: + raise Exception(f"Error setting cookies; debug: {e}") + + # response = make_response(redirect(os.environ['FRONT_URL'], code=302)) + + # domain = conf.get('COOKIE_DOMAIN', f".pods.{tapis_domain}") + # response.set_cookie("token", token, domain=domain, secure=True) + # response.set_cookie("username", username, domain=domain, secure=True) - return response + # return response + + #return JSONResponse(content = f"Callback for pod_id_net: {pod_id_net}, tapis_domain: {tapis_domain}, username: {username}, token: {token}", status_code = 200) + #return response diff --git a/service/health_central.py b/service/health_central.py index 1a30fcd..92abfbb 100644 --- a/service/health_central.py +++ b/service/health_central.py @@ -202,7 +202,10 @@ def set_traefik_proxy(): http_proxy_info = {} postgres_proxy_info = {} for input_pod in all_pods: +# logger.critical(f"TRAINING22-input_pod.tenant_id: {input_pod.tenant_id}, input_pod.site_id: {input_pod.site_id}") pod = combine_pod_and_template_recursively(input_pod, input_pod.template, tenant=input_pod.tenant_id, site=input_pod.site_id) +# logger.critical(f"TRAINING22-pod: HERE?") + logger.critical(f"TRAINNNED-pod: {pod}") # Each pod can have up to 3 networking objects with custom filled port/protocol/name for net_name, net_info in pod.networking.items(): if not isinstance(net_info, dict): @@ -254,7 +257,7 @@ def main(): Main function for health checks. """ # Try and run check_db_pods. Will try for 60 seconds until health is declared "broken". - logger.info("Top of health. Checking if db's are initialized.") + logger.info("Top of health central. Checking if db's are initialized.") idx = 0 while idx < 12: try: diff --git a/service/models_pods.py b/service/models_pods.py index 0b6335e..69d5d9b 100644 --- a/service/models_pods.py +++ b/service/models_pods.py @@ -31,7 +31,7 @@ from models_snapshots import Snapshot def get_queue_by_name(compute_queues, queue_name): - logger.debug("top of kubernetes_utils.deduct_queue_settings().") + logger.debug("top of models_pods.deduct_queue_settings().") for queue in compute_queues: if queue['queue_name'] == queue_name: @@ -421,7 +421,7 @@ def check_template(cls, values): tenant_id = values.get('tenant_id') site_id = values.get('site_id') - if template is not "" and tenant_id is not None and site_id is not None: + if template is not "" and tenant_id is not None and tenant_id is not "" and site_id is not None and site_id is not "": logger.debug(f"top of PodBaseFull.check_template() with template: {template}, tenant_id: {tenant_id}, site_id: {site_id}") template_name_str, template, template_tag = derive_template_info(template, tenant_id, site_id) values['template'] = template_name_str @@ -436,7 +436,7 @@ def check_image(cls, values): logger.debug(f"top of PodBaseFull.check_image() with image: {image}, template: {template}, tenant_id: {tenant_id}, site_id: {site_id}") ## Wait to make sure enough validation has happened for both to be initially set. - if image is not "" and template is not "" and tenant_id is not None and site_id is not None: + if image is not "" and template is not "" and tenant_id is not None and tenant_id is not "" and site_id is not None and site_id is not "": if image: # priority to template.image, so if it's set, it's top pass diff --git a/service/models_templates_tags.py b/service/models_templates_tags.py index 96494dd..7945c87 100644 --- a/service/models_templates_tags.py +++ b/service/models_templates_tags.py @@ -101,6 +101,13 @@ class Networking(TapisModel): protocol: str = Field("http", description = "Which network protocol to use. `http`, `tcp`, `postgres`, or `local_only`. `local_only` is only accessible from within the cluster.") port: int = Field(5000, description = "Pod port to expose via networking.url in this networking object.") url: str = Field("", description = "URL used to access the port of the pod defined in this networking object. Generated by service.") + ip_allow_list: list[str] = Field([], description = "List of IPs that are allowed to access this specific pod port. If empty, all IPs are allowed. ex. ['127.0.0.1/32', '192.168.1.7']") + tapis_auth: bool = Field(False, description = "If true, will require Tapis auth to access the pod.") + tapis_auth_response_headers: list[str] = Field([], description = "List of headers to forward to the pod when using Tapis auth.") + tapis_auth_allowed_users: list[str] = Field(["*"], description = "List of users allowed to access the pod when using Tapis auth.") + tapis_ui_uri: str = Field("", description = "Path to redirect to when accessing the pod via Tapis UI.") + tapis_ui_uri_redirect: bool = Field(False, description = "If true, will redirect to the tapis_ui_uri when accessing the pod via Tapis UI. Otherwise, just read-only uri.") + tapis_ui_uri_description: str = Field("", description = "Describing where the tapis_ui_uri will redirect to.") @validator('protocol') def check_protocol(cls, v): @@ -128,6 +135,59 @@ def check_url(cls, v): raise ValueError(f"networking.url length must be below 128 characters. Inputted length: {len(v)}") return v + @validator('tapis_auth_response_headers') + def check_tapis_auth_forward_cookies(cls, v): + if v: + if not isinstance(v, list): + raise TypeError(f"tapis_auth_response_headers must be list. Got '{type(v).__name__}'.") + for header in v: + if not isinstance(header, str): + raise TypeError(f"tapis_auth_response_headers must be list of str. Got '{type(header).__name__}'.") + return v + + @validator('tapis_auth_allowed_users') + def check_tapis_auth_allowed_users(cls, v): + if v: + if not isinstance(v, list): + raise TypeError(f"tapis_auth_allowed_users must be list. Got '{type(v).__name__}'.") + for user in v: + if not isinstance(user, str): + raise TypeError(f"tapis_auth_allowed_users must be list of str. Got '{type(user).__name__}'.") + return v + + @validator('tapis_ui_uri') + def check_tapis_ui_uri(cls, v): + if v: + # Regex match to ensure url is safe with only [A-z0-9.-/] chars. + res = re.fullmatch(r'[a-z][a-z0-9.-/]+', v) + if not res: + raise ValueError(f"networking.tapis_ui_uri can only contain lowercase alphanumeric characters, periods, forward-slash, and hyphens.") + # pod_id char limit = 64 + if len(v) > 128: + raise ValueError(f"networking.tapis_ui_uri length must be below 128 characters. Inputted length: {len(v)}") + return v + + @validator('tapis_ui_uri_description') + def check_tapis_ui_uri_description(cls, v): + # ensure tapis_ui_uri_description is all ascii + if not v.isascii(): + raise ValueError(f"tapis_ui_uri_description field may only contain ASCII characters.") + # make sure tapis_ui_uri_description < 255 characters + if len(v) > 255: + raise ValueError(f"tapis_ui_uri_description field must be less than 255 characters. Inputted length: {len(v)}") + return v + + @root_validator(pre=False) + def check_tapis_auth_fields(cls, values): + protocol = values.get('protocol') + tapis_auth = values.get('tapis_auth') + + if tapis_auth and protocol != "http": + raise ValueError(f"tapis_auth can only be used with protocol 'http'.") + + return values + + class Resources(TapisModel): # CPU/Mem defaults are set in configschema.json @@ -431,8 +491,20 @@ def combine_pod_and_template_recursively(input_obj, template_name, seen_template else: setattr(input_obj, mod_key, mod_val) - if input_obj.resources: - input_obj.resources = input_obj.resources.dict() + logger.debug(f"End of combine_pod_and_template_recursively for template: {template_name}, tenant: {tenant}, site: {site}") + try: + if input_obj.resources: + input_obj.resources = input_obj.resources.dict() + except Exception as e: + logger.debug(f'this resources part: Got exception when attempting to combine pod and templates: {e}') + pass + + # try: + # if input_obj.networking: + # input_obj.networking = input_obj.networking.dict() + # except Exception as e: + # logger.debug(f'this networking part: Got exception when attempting to combine pod and templates: {e}') + # pass except Exception as e: logger.debug(f'Got exception when attempting to combine pod and templates: {e}') diff --git a/service/templates/traefik-template.j2 b/service/templates/traefik-template.j2 index b23d7ed..c9660c0 100644 --- a/service/templates/traefik-template.j2 +++ b/service/templates/traefik-template.j2 @@ -1,12 +1,3 @@ -entryPoints: - web: - address: ":80" - transport: - respondingTimeouts: - readTimeout: 10 - dash: - address: ":8080" - http: middlewares: path-strip-v3: @@ -27,7 +18,7 @@ http: service: pods-service query: "/error-handler/{status}" - {% for pname, pdata in http_proxy_info.items() -%} + {% for pname, pdata in http_proxy_info.items() -%} {% if pdata.tapis_auth -%} tapis-auth-{{ pname }}: forwardAuth: @@ -59,6 +50,8 @@ http: {% for pname, pdata in http_proxy_info.items() -%} {{ pname }}: + entryPoints: + - web rule: "Host(`{{ pdata.url }}`)" {% if pdata.tapis_auth -%} middlewares: @@ -82,11 +75,17 @@ http: - url: http://{{ pdata.k8_service }}:{{ pdata.routing_port }} {% endfor %} +tcpServersTransport: + dialTimeout: 180s + dialKeepAlive: 180s + {% if tcp_proxy_info or postgres_proxy_info -%} tcp: routers:{% endif %} {% for pname, pdata in tcp_proxy_info.items() -%} {{ pname }}: + entryPoints: + - web rule: "HostSNI(`{{ pdata.url }}`)" service: "{{ pname }}" tls: