Skip to content

Commit

Permalink
Merge branch 'staging' into prod
Browse files Browse the repository at this point in the history
  • Loading branch information
NotChristianGarcia committed Dec 4, 2024
2 parents 7d9fcfb + 7ed7ce3 commit 26ee27c
Show file tree
Hide file tree
Showing 11 changed files with 404 additions and 201 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ kubernetes==28.1.0
neo4j-driver
rabbitpy
channelpy
python-multipart

# Misc
pylint
Expand Down
5 changes: 4 additions & 1 deletion service/api_pods_podid.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,13 @@ async def get_derived_pod(pod_id):
pods_env = Password.db_get_with_pk(pod.pod_id, pod.tenant_id, pod.site_id)
pods_env = pods_env.dict()
for key, val in final_pod.environment_variables.items():
new_val = val
if isinstance(val, str):
# regex to create list of [<<TAPIS_*>> strings, str of inner variable without >><<]
matches = re.findall(r'<<TAPIS_(.*?)>>', val)
for match in matches:
final_pod.environment_variables[key] = val.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
new_val = new_val.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
final_pod.environment_variables[key] = new_val


return ok(result=final_pod.display(), msg="Final derived pod retrieved successfully.")
319 changes: 197 additions & 122 deletions service/api_pods_podid_func.py

Large diffs are not rendered by default.

42 changes: 27 additions & 15 deletions service/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,33 +86,41 @@ def rm_volume(k8_name):

return volume_exists

def graceful_rm_pod(pod, log = None):
def graceful_rm_pod(pod, log=None):
"""
This is async. Commands run, but deletion takes some time.
Needs to delete pod, delete service, and change traefik to "offline" response.
TODO Set status to shutting down. Something else will put into "STOPPED".
"""
logger.info(f"Top of shutdown pod for pod: {pod.k8_name}")
# Change pod status to SHUTTING DOWN
pod.status = DELETING
pod.db_update(log)
logger.debug(f"spawner has updated pod status to DELETING")
try:
logger.info(f"Top of shutdown pod for pod: {pod.k8_name}")
# Change pod status to SHUTTING DOWN
pod.status = DELETING
pod.db_update(log)
logger.debug(f"spawner has updated pod status to DELETING")

return rm_pod(pod.k8_name)
return rm_pod(pod.k8_name)
except Exception as e:
logger.error(f"Failed to gracefully remove pod {pod.k8_name}")
raise

def graceful_rm_volume(volume):
"""
This is async. Commands run, but deletion takes some time.
Needs to delete volume, delete volume, and change traefik to "offline" response.
TODO Set status to shutting down. Something else will put into "STOPPED".
"""
logger.info(f"Top of shutdown volume for volume: {volume.k8_name}")
# Change pod status to SHUTTING DOWN
volume.status = DELETING
volume.db_update()
logger.debug(f"spawner has updated volume status to DELETING")
try:
logger.info(f"Top of shutdown volume for volume: {volume.k8_name}")
# Change volume status to SHUTTING DOWN
volume.status = DELETING
volume.db_update()
logger.debug(f"spawner has updated volume status to DELETING")

return rm_volume(volume.k8_name)
return rm_volume(volume.k8_name)
except Exception as e:
logger.error(f"Failed to gracefully remove volume {volume.k8_name}")
raise

def check_k8_pods(k8_pods):
"""
Expand Down Expand Up @@ -238,7 +246,11 @@ def check_k8_pods(k8_pods):
logs = get_k8_logs(k8_pod['k8_name'])
if pod.logs != logs:
pod.logs = logs
pod.db_update() # just adding logs, no action_logs needed.
logger.critical(f"UPDATING:: Before update with logs: {pod}")
try:
pod.db_update() # just adding logs, no action_logs needed.
except Exception as e:
logger.error(f"Error updating pod logs: {e}", exc_info=True)

def check_k8_services():
# This is all for only the site specified in conf.site_id.
Expand Down Expand Up @@ -385,7 +397,7 @@ def main():
logger.info("Successfully connected to dbs.")
break
except Exception as e:
logger.info(f"Can't connect to dbs yet idx: {idx}. e: {e.orig}") # args: {e.args} # add e.args for more detail
logger.info(f"Can't connect to dbs yet idx: {idx}. e: {e}") # args: {e.args} # add e.args for more detail
# Health seems to take a few seconds to come up (due to database creation and api creation)
# Increment and have a short wait
idx += 1
Expand Down
9 changes: 6 additions & 3 deletions service/health_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ def set_traefik_proxy():
http_proxy_info = {}
postgres_proxy_info = {}
for input_pod in all_pods:
# logger.critical(f"TRAINING22-input_pod.tenant_id: {input_pod.tenant_id}, input_pod.site_id: {input_pod.site_id}")
pod = combine_pod_and_template_recursively(input_pod, input_pod.template, tenant=input_pod.tenant_id, site=input_pod.site_id)
# logger.critical(f"TRAINING22-pod: HERE?")
logger.critical(f"TRAINNNED-pod: {pod}")
# Each pod can have up to 3 networking objects with custom filled port/protocol/name
for net_name, net_info in pod.networking.items():
if not isinstance(net_info, dict):
Expand All @@ -227,9 +230,9 @@ def set_traefik_proxy():
pod_id = pod_id_section
logger.critical(f"pod_id: {pod_id}, tapis_domain: {tapis_domain}, net_info: {net_info}")
forward_auth_info = {
"tapis_auth": net_info.get('tapis_auth', True),
"tapis_auth": net_info.get('tapis_auth', False),
"auth_url": f"https://{tapis_domain}/v3/pods/{pod_id}/auth",
"tapis_auth_response_headers": net_info.get('tapis_auth_response_headers', []),
"tapis_auth_response_headers": net_info.get('tapis_auth_response_headers', {}),
}

match net_info['protocol']:
Expand All @@ -254,7 +257,7 @@ def main():
Main function for health checks.
"""
# Try and run check_db_pods. Will try for 60 seconds until health is declared "broken".
logger.info("Top of health. Checking if db's are initialized.")
logger.info("Top of health central. Checking if db's are initialized.")
idx = 0
while idx < 12:
try:
Expand Down
50 changes: 33 additions & 17 deletions service/kubernetes_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,21 @@ def combine_pod_and_template_recursively(input_obj, template_name, seen_template
else:
setattr(input_obj, mod_key, mod_val)

if input_obj.resources:
input_obj.resources = input_obj.resources.dict()
logger.debug(f"End of combine_pod_and_template_recursively for template: {template_name}, tenant: {tenant}, site: {site}")
try:
if input_obj.resources:
input_obj.resources = input_obj.resources.dict()
except Exception as e:
logger.debug(f'this resources part: Got exception when attempting to combine pod and templates: {e}')
pass

try:
if input_obj.networking:
input_obj.networking = input_obj.networking.dict()
except Exception as e:
logger.debug(f'this networking part: Got exception when attempting to combine pod and templates: {e}')
pass


except Exception as e:
logger.debug(f'Got exception when attempting to combine pod and templates: {e}')
Expand Down Expand Up @@ -117,25 +130,28 @@ def start_generic_pod(input_pod, revision: int):
pods_env = pods_env.dict()
if final_pod.environment_variables:
for key, val in final_pod.environment_variables.items():
new_val = val
if isinstance(val, str):
# regex to create list of [<<TAPIS_*>> strings, str of inner variable without >><<]
matches = re.findall(r'<<TAPIS_(.*?)>>', val)
for match in matches:
final_pod.environment_variables[key] = val.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
#command
if final_pod.command:
for key in final_pod.command:
if isinstance(key, str):
matches = re.findall(r'<<TAPIS_(.*?)>>', key)
for match in matches:
final_pod.command[key] = key.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
#arguments
if final_pod.arguments:
for key in final_pod.arguments:
if isinstance(key, str):
matches = re.findall(r'<<TAPIS_(.*?)>>', key)
for match in matches:
final_pod.arguments[key] = key.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
new_val = new_val.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
final_pod.environment_variables[key] = new_val

# #command
# if final_pod.command:
# for key in final_pod.command:
# if isinstance(key, str):
# matches = re.findall(r'<<TAPIS_(.*?)>>', key)
# for match in matches:
# final_pod.command[key] = key.replace(f"<<TAPIS_{match}>>", pods_env.get(match))
# #arguments
# if final_pod.arguments:
# for key in final_pod.arguments:
# if isinstance(key, str):
# matches = re.findall(r'<<TAPIS_(.*?)>>', key)
# for match in matches:
# final_pod.arguments[key] = key.replace(f"<<TAPIS_{match}>>", pods_env.get(match))

volumes = []
volume_mounts = []
Expand Down
1 change: 0 additions & 1 deletion service/models_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def get_site_tenant_session(obj={}, tenant: str = None, site: str = None):
site_id = site or getattr(obj, 'site_id', None) or 'tacc'
logger.info(f"Using site: {site_id}; tenant: {tenant_id}. Getting tenant pg obj.")
store = pg_store[site_id][tenant_id]
logger.debug(f"Using site: {site_id}; tenant: {tenant_id}; Session: {Session}.")
return site_id, tenant_id, store

def db_create(self, tenant: str = None, site: str = None):
Expand Down
Loading

0 comments on commit 26ee27c

Please sign in to comment.