From 26cd034679037a370a2812dd150ca4e1cb5806d8 Mon Sep 17 00:00:00 2001 From: Mateo Gonzales Navarrete <38146507+mgonnav@users.noreply.github.com> Date: Thu, 30 Nov 2023 12:47:41 -0500 Subject: [PATCH] (fix)API: Correctly pass scheduled job proxy env vars when launching jobs (#237) --- estela-api/api/utils.py | 7 +++---- estela-api/api/views/job.py | 18 ++++++------------ estela-api/core/tasks.py | 25 +++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/estela-api/api/utils.py b/estela-api/api/utils.py index f6749a95..28e70049 100644 --- a/estela-api/api/utils.py +++ b/estela-api/api/utils.py @@ -1,12 +1,12 @@ from datetime import timedelta -import redis from django.conf import settings +import redis from api import errors from api.exceptions import DataBaseError from config.job_manager import spiderdata_db_client -from core.models import SpiderJobEnvVar, ProxyProvider +from core.models import SpiderJobEnvVar def update_env_vars(instance, env_vars, level="project", delete=True): @@ -75,8 +75,7 @@ def delete_stats_from_redis(job): pass -def get_proxy_provider_envs(proxy_id): - proxy_provider = ProxyProvider.objects.get(pk=proxy_id) +def get_proxy_provider_envs(proxy_provider): proxy_attrs = [ "username", "password", diff --git a/estela-api/api/views/job.py b/estela-api/api/views/job.py index 8dd9fca5..3e9a9dc4 100644 --- a/estela-api/api/views/job.py +++ b/estela-api/api/views/job.py @@ -7,15 +7,15 @@ from rest_framework.response import Response from api.filters import SpiderJobFilter -from api.mixins import BaseViewSet, ActionHandlerMixin +from api.mixins import ActionHandlerMixin, BaseViewSet from api.serializers.job import ( SpiderJobCreateSerializer, SpiderJobSerializer, SpiderJobUpdateSerializer, ) -from api.utils import update_stats_from_redis, get_proxy_provider_envs +from api.utils import get_proxy_provider_envs, update_stats_from_redis from config.job_manager import job_manager -from core.models import DataStatus, Project, Spider, SpiderJob, ProxyProvider +from core.models import DataStatus, Project, ProxyProvider, Spider, SpiderJob class SpiderJobViewSet( @@ -121,17 +121,11 @@ def create(self, request, *args, **kwargs): env_var.name: env_var.value for env_var in job.env_vars.all() } - proxy_provider_names = [ - (proxy.name, proxy.proxyid) for proxy in ProxyProvider.objects.all() - ] proxy_name = job_env_vars.get("ESTELA_PROXY_NAME") - if proxy_name: - proxy_id = next( - (tup[1] for tup in proxy_provider_names if proxy_name in tup), None - ) - if proxy_id: - proxy_env_vars = get_proxy_provider_envs(proxy_id) + proxy_provider = ProxyProvider.objects.filter(name=proxy_name).first() + if proxy_provider: + proxy_env_vars = get_proxy_provider_envs(proxy_provider) job_env_vars.update( { env_var["name"]: env_var["value"] diff --git a/estela-api/core/tasks.py b/estela-api/core/tasks.py index c68e47ca..c5edef91 100644 --- a/estela-api/core/tasks.py +++ b/estela-api/core/tasks.py @@ -10,10 +10,21 @@ from rest_framework.authtoken.models import Token from api.serializers.job import SpiderJobCreateSerializer -from api.utils import delete_stats_from_redis, update_stats_from_redis +from api.utils import ( + delete_stats_from_redis, + get_proxy_provider_envs, + update_stats_from_redis, +) from config.celery import app as celery_app from config.job_manager import job_manager, spiderdata_db_client -from core.models import DataStatus, Project, Spider, SpiderJob, UsageRecord +from core.models import ( + DataStatus, + Project, + ProxyProvider, + Spider, + SpiderJob, + UsageRecord, +) def get_default_token(job): @@ -86,6 +97,16 @@ def launch_job(sid_, data_, data_expiry_days=None, token=None): job_args = {arg.name: arg.value for arg in job.args.all()} job_env_vars = {env_var.name: env_var.value for env_var in job.env_vars.all()} + + proxy_name = job_env_vars.get("ESTELA_PROXY_NAME") + if proxy_name: + proxy_provider = ProxyProvider.objects.filter(name=proxy_name).first() + if proxy_provider: + proxy_env_vars = get_proxy_provider_envs(proxy_provider) + job_env_vars.update( + {env_var["name"]: env_var["value"] for env_var in proxy_env_vars} + ) + job_manager.create_job( job.name, job.key,