Skip to content

Commit

Permalink
Merge branch 'master' into mayday
Browse files Browse the repository at this point in the history
  • Loading branch information
arunmathaisk authored Jan 2, 2025
2 parents a530d32 + 65b8ad7 commit 3ff450b
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 43 deletions.
6 changes: 4 additions & 2 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"erpnext",
"parenttype",
"parentfield",
"subsription",
"sbool",
"binlog",
"ifnull",
Expand Down Expand Up @@ -68,6 +67,9 @@
"nvme",
"nofail"

"hset",
"serializability",
"oom"
],
"allowCompoundWords": true,
"ignorePaths": [
Expand All @@ -82,4 +84,4 @@
"requirements.txt",
"press/utils/country_timezone.py"
]
}
}
50 changes: 31 additions & 19 deletions press/api/monitoring.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2021, Frappe and contributors
# For license information, please see license.txt

Expand All @@ -7,15 +6,11 @@

import frappe

from press.exceptions import AlertRuleNotEnabled
from press.utils import log_error


@frappe.whitelist(allow_guest=True)
def targets(token):
monitor_token = frappe.db.get_single_value("Press Settings", "monitor_token")
if token != monitor_token:
return

def get_benches():
self_hosted_stand_alone_servers = frappe.get_all(
"Server",
{"is_standalone": True, "is_self_hosted": True, "status": "Active"},
Expand All @@ -39,18 +34,18 @@ def targets(token):
)
}
benches = []
for bench_name, sites in groupby(sites, lambda x: x.bench):
for bench_name, _sites in groupby(sites, lambda x: x.bench):
bench = bench_map[bench_name]
bench.update({"sites": [site.name for site in sites]})
bench.update({"sites": [site.name for site in _sites]})
benches.append(bench)

return benches


def get_clusters():
servers = {}
servers["proxy"] = frappe.get_all(
"Proxy Server", {"status": ("!=", "Archived")}, ["name", "cluster"]
)
servers["app"] = frappe.get_all(
"Server", {"status": ("!=", "Archived")}, ["name", "cluster"]
)
servers["proxy"] = frappe.get_all("Proxy Server", {"status": ("!=", "Archived")}, ["name", "cluster"])
servers["app"] = frappe.get_all("Server", {"status": ("!=", "Archived")}, ["name", "cluster"])
servers["database"] = frappe.get_all(
"Database Server", {"status": ("!=", "Archived")}, ["name", "cluster"]
)
Expand All @@ -69,10 +64,16 @@ def targets(token):
for job in job_map[server_type]:
cluster["jobs"].setdefault(job, []).append(server.name)

domains = frappe.get_all(
return clusters


def get_domains():
return frappe.get_all(
"Site Domain", ["name", "site"], {"tls_certificate": ("is", "set")}, order_by="name"
)


def get_tls():
tls = []
server_types = [
"Server",
Expand All @@ -87,13 +88,22 @@ def targets(token):
for server_type in server_types:
tls += frappe.get_all(server_type, {"status": ("!=", "Archived")}, ["name"])

return {"benches": benches, "clusters": clusters, "domains": domains, "tls": tls}
return tls


@frappe.whitelist(allow_guest=True)
def targets(token):
monitor_token = frappe.db.get_single_value("Press Settings", "monitor_token")
if token != monitor_token:
return None

return {"benches": get_benches(), "clusters": get_clusters(), "domains": get_domains(), "tls": get_tls()}


@frappe.whitelist(allow_guest=True, xss_safe=True)
def alert(*args, **kwargs):
try:
user = frappe.session.user
user = str(frappe.session.user)
frappe.set_user("Administrator")

doc = frappe.get_doc(
Expand All @@ -103,8 +113,10 @@ def alert(*args, **kwargs):
}
)
doc.insert()
except AlertRuleNotEnabled:
pass
except Exception:
log_error("Alertmanager Webhook Error", args=args, kwargs=kwargs)
raise Exception
raise
finally:
frappe.set_user(user)
4 changes: 4 additions & 0 deletions press/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,7 @@ class ConflictingCAARecord(ValidationError):

class TeamHeaderNotInRequestError(ValidationError):
pass


class AlertRuleNotEnabled(ValidationError):
pass
4 changes: 4 additions & 0 deletions press/press/doctype/agent_job/agent_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ def failed_because_of_agent_update(self) -> bool:
return True
return False

@property
def on_public_server(self):
return bool(frappe.db.get_value(self.server_type, self.server, "public"))


def job_detail(job):
job = frappe.get_doc("Agent Job", job)
Expand Down
67 changes: 60 additions & 7 deletions press/press/doctype/agent_job/agent_job_notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,19 @@ class JobErr(Enum):
ROW_SIZE_TOO_LARGE = auto()
DATA_TRUNCATED_FOR_COLUMN = auto()
BROKEN_PIPE_ERR = auto()
CANT_CONNECT_TO_MYSQL = auto()
GZIP_TAR_ERR = auto()
UNKNOWN_COMMAND_HYPHEN = auto()


DOC_URLS = {
JobErr.OOM: "https://frappecloud.com/docs/common-issues/oom-issues",
JobErr.ROW_SIZE_TOO_LARGE: "https://frappecloud.com/docs/faq/site#row-size-too-large-error-on-migrate",
JobErr.DATA_TRUNCATED_FOR_COLUMN: "https://frappecloud.com/docs/faq/site#data-truncated-for-column",
JobErr.BROKEN_PIPE_ERR: None,
JobErr.CANT_CONNECT_TO_MYSQL: "https://frappecloud.com/docs/cant-connect-to-mysql-server",
JobErr.GZIP_TAR_ERR: "https://frappecloud.com/docs/sites/migrate-an-existing-site#tar-gzip-command-fails-with-unexpected-eof",
JobErr.UNKNOWN_COMMAND_HYPHEN: "https://frappecloud.com/docs/unknown-command-",
}


Expand All @@ -88,11 +94,15 @@ def handlers() -> list[UserAddressableHandlerTuple]:
Due to this order of the tuples matter.
"""
return [
("returned non-zero exit status 137", update_with_oom_error),
("returned non-zero exit status 143", update_with_oom_error),
("Row size too large", update_with_row_size_too_large_error),
("Data truncated for column", update_with_data_truncated_for_column_error),
("returned non-zero exit status 137", update_with_oom_err),
("returned non-zero exit status 143", update_with_oom_err),
("Row size too large", update_with_row_size_too_large_err),
("Data truncated for column", update_with_data_truncated_for_column_err),
("BrokenPipeError", update_with_broken_pipe_err),
("ERROR 2002 (HY000)", update_with_cant_connect_to_mysql_err),
("gzip: stdin: unexpected end of file", update_with_gzip_tar_err),
("tar: Unexpected EOF in archive", update_with_gzip_tar_err),
("Unknown command '\\-'.", update_with_unknown_command_hyphen_err),
]


Expand Down Expand Up @@ -165,7 +175,7 @@ def get_details(job: AgentJob, title: str, message: str) -> Details:
return details


def update_with_oom_error(
def update_with_oom_err(
details: Details,
job: AgentJob,
):
Expand All @@ -192,7 +202,7 @@ def update_with_oom_error(
return False


def update_with_row_size_too_large_error(details: Details, job: AgentJob):
def update_with_row_size_too_large_err(details: Details, job: AgentJob):
details["title"] = "Row size too large error"

details[
Expand All @@ -207,7 +217,7 @@ def update_with_row_size_too_large_error(details: Details, job: AgentJob):
return True


def update_with_data_truncated_for_column_error(details: Details, job: AgentJob):
def update_with_data_truncated_for_column_err(details: Details, job: AgentJob):
details["title"] = "Data truncated for column error"

details[
Expand Down Expand Up @@ -237,6 +247,49 @@ def update_with_broken_pipe_err(details: Details, job: AgentJob):
return True


def update_with_cant_connect_to_mysql_err(details: Details, job: AgentJob):
details["title"] = "Can't connect to MySQL server"

suggestion = "To rectify this issue, please follow the steps mentioned in <i>Help</i>."
if job.on_public_server:
suggestion = "Please raise a support ticket if the issue persists."

details[
"message"
] = f"""<p>The server couldn't connect to MySQL server during the job. This likely happened as the mysql server restarted as it didn't have sufficient memory for the operation</p>
<p>{suggestion}</p>
"""

details["assistance_url"] = DOC_URLS[JobErr.CANT_CONNECT_TO_MYSQL]

return True


def update_with_gzip_tar_err(details: Details, job: AgentJob):
details["title"] = "Corrupt backup file"

details["message"] = f"""<p>An error occurred when extracting the backup to {job.site}.</p>
<p>To rectify this issue, please follow the steps mentioned in <i>Help</i>.</p>
"""

details["assistance_url"] = DOC_URLS[JobErr.GZIP_TAR_ERR]

return True


def update_with_unknown_command_hyphen_err(details: Details, job: AgentJob):
details["title"] = "Incompatible site backup"

details["message"] = f"""<p>An error occurred when extracting the backup to {job.site}.</p>
<p>This happens when the backup is taken from a later version of MariaDB and restored on a older version.</p>
<p>To rectify this issue, please follow the steps mentioned in <i>Help</i>.</p>
"""

details["assistance_url"] = DOC_URLS[JobErr.UNKNOWN_COMMAND_HYPHEN]

return True


def get_default_title(job: AgentJob) -> str:
if job.job_type == "Update Site Migrate":
return "Site Migrate"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from frappe.utils.background_jobs import enqueue_doc
from frappe.utils.data import add_to_date

from press.exceptions import AlertRuleNotEnabled
from press.press.doctype.incident.incident import INCIDENT_ALERT, INCIDENT_SCOPE
from press.press.doctype.telegram_message.telegram_message import TelegramMessage
from press.utils import log_error
Expand Down Expand Up @@ -78,7 +79,9 @@ def clear_old_logs(days=10):

def validate(self):
self.parsed = json.loads(self.payload)
self.alert = self.parsed["groupLabels"]["alertname"]
self.alert = self.parsed["groupLabels"].get("alertname")
if not self.alert:
raise AlertRuleNotEnabled("No alertname found in groupLabels")
self.status = self.parsed["status"].capitalize()
self.severity = self.parsed["commonLabels"]["severity"].capitalize()
self.group_key = self.parsed["groupKey"]
Expand Down
40 changes: 26 additions & 14 deletions press/press/doctype/site/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,8 +594,8 @@ def rename(self, new_name: str):
# update the subscription config while renaming the standby site
self.update_config_preview()
site_config = json.loads(self.config)
subsription_config = site_config.get("subscription", {})
job = agent.rename_site(self, new_name, create_user, config={"subscription": subsription_config})
subscription_config = site_config.get("subscription", {})
job = agent.rename_site(self, new_name, create_user, config={"subscription": subscription_config})
self.flags.rename_site_agent_job_name = job.name
else:
agent.rename_site(self, new_name)
Expand Down Expand Up @@ -2320,7 +2320,6 @@ def get_database_performance_report(self):
from press.press.report.mariadb_slow_queries.mariadb_slow_queries import get_data as get_slow_queries

agent = Agent(self.server)
result = agent.get_summarized_performance_report_of_database(self)
# fetch slow queries of last 7 days
slow_queries = get_slow_queries(
frappe._dict(
Expand All @@ -2334,27 +2333,40 @@ def get_database_performance_report(self):
}
)
)
# remove `parent` & `creation` indexes from unused_indexes
result["unused_indexes"] = [
index
for index in result.get("unused_indexes", [])
if index["index_name"] not in ["parent", "creation"]
]

# convert all the float to int
for query in slow_queries:
for key, value in query.items():
if isinstance(value, float):
query[key] = int(value)
# sort the slow queries by `rows_examined`
result["slow_queries"] = sorted(slow_queries, key=lambda x: x["rows_examined"], reverse=True)
result["is_performance_schema_enabled"] = False
is_performance_schema_enabled = False
if database_server := frappe.db.get_value("Server", self.server, "database_server"):
result["is_performance_schema_enabled"] = frappe.db.get_value(
is_performance_schema_enabled = frappe.db.get_value(
"Database Server",
database_server,
"is_performance_schema_enabled",
)
result = None
if is_performance_schema_enabled:
with suppress(Exception):
# for larger table or if database has any locks, fetching perf report will be failed
result = agent.get_summarized_performance_report_of_database(self)
# remove `parent` & `creation` indexes from unused_indexes
result["unused_indexes"] = [
index
for index in result.get("unused_indexes", [])
if index["index_name"] not in ["parent", "creation"]
]

if not result:
result = {}
result["unused_indexes"] = []
result["redundant_indexes"] = []
result["top_10_time_consuming_queries"] = []
result["top_10_queries_with_full_table_scan"] = []

# sort the slow queries by `rows_examined`
result["slow_queries"] = sorted(slow_queries, key=lambda x: x["rows_examined"], reverse=True)
result["is_performance_schema_enabled"] = is_performance_schema_enabled
return result

@property
Expand Down
1 change: 1 addition & 0 deletions press/press/doctype/usage_record/usage_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def link_unlinked_usage_records():
"invoice": ("is", "not set"),
"date": ("between", (fd, ld)),
"team": ("not in", free_teams),
"docstatus": 1,
},
pluck="name",
ignore_ifnull=True,
Expand Down

0 comments on commit 3ff450b

Please sign in to comment.