From 9659e4c657b1d6c588245c2fcafe6fdf6f69595e Mon Sep 17 00:00:00 2001 From: Robert Cowham Date: Fri, 23 Aug 2024 14:09:37 +1000 Subject: [PATCH] New metric p4_cmds_paused_cumulative and new field for log2sql --- cmd/log2sql/main.go | 11 ++++---- metrics/metrics.go | 5 ++++ metrics/metrics_test.go | 56 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/cmd/log2sql/main.go b/cmd/log2sql/main.go index 9392513..a1da304 100644 --- a/cmd/log2sql/main.go +++ b/cmd/log2sql/main.go @@ -38,6 +38,7 @@ func writeHeader(f io.Writer) { pid INT NOT NULL, -- Process ID startTime DATETIME NOT NULL, endTime DATETIME NULL, -- Start/end time of command computedLapse FLOAT NULL, completedLapse FLOAT NULL, -- Lapse time for compute phase and total command (secs) + paused FLOAT NULL, -- Amount of time command paused (secs) user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, -- user/workspace name/IP app TEXT NOT NULL, -- p4api application reported, e.g. p4/p4v etc cmd TEXT NOT NULL, -- command executed, e.g. user-sync @@ -130,7 +131,7 @@ func dateStr(t time.Time) string { func getProcessStatement() string { return `INSERT INTO process (processkey, lineNumber, pid, - startTime ,endTime, computedLapse, completedLapse, + startTime ,endTime, computedLapse, completedLapse, paused, user, workspace, ip, app, cmd, args, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults, memMB, memPeakMB, rpcMsgsIn, rpcMsgsOut, @@ -153,7 +154,7 @@ func getProcessStatement() string { lbrUncompressWrites, lbrUncompressWriteBytes, lbrUncompressDigests, lbrUncompressFileSizes, lbrUncompressModtimes, lbrUncompressCopies, error) - VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)` + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)` } func getEventsStatement() string { @@ -182,7 +183,7 @@ func preparedInsert(logger *logrus.Logger, stmtProcess, stmtTableuse *sqlite3.St rows := 1 err := stmtProcess.Exec( cmd.GetKey(), cmd.LineNo, cmd.Pid, dateStr(cmd.StartTime), dateStr(cmd.EndTime), - float64(cmd.ComputeLapse), float64(cmd.CompletedLapse), + float64(cmd.ComputeLapse), float64(cmd.CompletedLapse), float64(cmd.Paused), string(cmd.User), string(cmd.Workspace), string(cmd.IP), string(cmd.App), string(cmd.Cmd), string(cmd.Args), cmd.UCpu, cmd.SCpu, cmd.DiskIn, cmd.DiskOut, cmd.IpcIn, cmd.IpcOut, cmd.MaxRss, cmd.PageFaults, cmd.MemMB, cmd.MemPeakMB, cmd.RPCMsgsIn, cmd.RPCMsgsOut, @@ -246,7 +247,7 @@ func writeSQLServerEvents(f io.Writer, evt *p4dlog.ServerEvent) int64 { func writeSQL(f io.Writer, cmd *p4dlog.Command) int64 { rows := 1 - fmt.Fprintf(f, `INSERT INTO process VALUES ("%s",%d,%d,"%s","%s",%0.3f,%0.3f,`+ + fmt.Fprintf(f, `INSERT INTO process VALUES ("%s",%d,%d,"%s","%s",%0.3f,%0.3f,%.3f,`+ `"%s","%s","%s","%s","%s","%s",%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,`+ `%.3f,%.3f,%d,%d,%d,%d,%d,%d,`+ `%d,%d,%d,%d,%d,%d,%d,%d,`+ @@ -257,7 +258,7 @@ func writeSQL(f io.Writer, cmd *p4dlog.Command) int64 { `%d,%d,%d,%d,%d,%d,%d,%d,`+ `%d,%d,%d,%d,"%v");`+"\n", cmd.GetKey(), cmd.LineNo, cmd.Pid, dateStr(cmd.StartTime), dateStr(cmd.EndTime), - cmd.ComputeLapse, cmd.CompletedLapse, + cmd.ComputeLapse, cmd.CompletedLapse, cmd.Paused, cmd.User, cmd.Workspace, cmd.IP, cmd.App, cmd.Cmd, cmd.Args, cmd.UCpu, cmd.SCpu, cmd.DiskIn, cmd.DiskOut, cmd.IpcIn, cmd.IpcOut, cmd.MaxRss, cmd.PageFaults, cmd.MemMB, cmd.MemPeakMB, cmd.RPCMsgsIn, cmd.RPCMsgsOut, diff --git a/metrics/metrics.go b/metrics/metrics.go index 2866574..2e871c7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -68,6 +68,7 @@ type P4DMetrics struct { pauseRateMem int64 // ditto cpuPressureState int64 // ditto memPressureState int64 // ditto + cmdsPausedCumulative float64 cmdCounter map[string]int64 cmdErrorCounter map[string]int64 cmdCumulative map[string]float64 @@ -307,6 +308,7 @@ func (p4m *P4DMetrics) getCumulativeMetrics() string { p4m.outputMetric(metrics, "p4_pause_rate_mem", "The (resource pressure) pause rate for Mem", "gauge", fmt.Sprintf("%d", p4m.pauseRateMem), fixedLabels) p4m.outputMetric(metrics, "p4_pause_state_cpu", "The (resource pressure) pause state for CPU (0-2)", "gauge", fmt.Sprintf("%d", p4m.cpuPressureState), fixedLabels) p4m.outputMetric(metrics, "p4_pause_state_mem", "The (resource pressure) pause state for Mem (0-2)", "gauge", fmt.Sprintf("%d", p4m.memPressureState), fixedLabels) + p4m.outputMetric(metrics, "p4_cmds_paused_cumulative", "Total time of commands paused due to resource pressure (seconds)", "counter", fmt.Sprintf("%.3f", p4m.cmdsPausedCumulative), fixedLabels) // Cross platform call - eventually when Windows implemented userCPU, systemCPU := getCPUStats() @@ -535,6 +537,9 @@ func (p4m *P4DMetrics) publishCmdEvent(cmd p4dlog.Command) { if cmd.CmdError { p4m.cmdErrorCounter[cmd.Cmd]++ } + if cmd.Paused > 0.0 { + p4m.cmdsPausedCumulative += float64(cmd.Paused) + } p4m.cmdsRunning = cmd.Running p4m.memMB += cmd.MemMB p4m.memPeakMB += cmd.MemPeakMB diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 1bcc20b..f2fa600 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -128,7 +128,7 @@ func filterLines(input []string) []string { for _, line := range input { if !hasPrefix(ignorePrefixes, line) { p := strings.Split(line, " ") - if len(p) <= 1 || p[1] != "0" { + if len(p) <= 1 || (p[1] != "0" && p[1] != "0.000") { result = append(result, line) } } @@ -902,3 +902,57 @@ p4_prom_log_lines_read{serverid="myserverid"} 23 p4_prom_svr_events_processed{serverid="myserverid"} 1`, -1) compareOutput(t, expected, output) } + +func TestServerEventsPausedCumulative(t *testing.T) { + cfg := &Config{ + ServerID: "myserverid", + UpdateInterval: 10 * time.Millisecond, + OutputCmdsByUser: true} + input := ` +Perforce server info: + 2024/06/19 12:25:31 pid 1056864 perforce@ip-10-0-0-106 127.0.0.1 [p4/2024.1.TEST-TEST_ONLY/LINUX26X86_64/2611120] 'user-fstat -Ob //...' + +Perforce server error: + Date 2024/06/19 12:25:31: + Pid 1056860 + Operation: user-fstat + Operation 'user-fstat' failed. + Too many commands paused; terminated. + +Perforce server info: + 2024/06/19 12:25:39 pid 1056864 completed 8.39s 598+67us 304+0io 0+0net 68864k 0pf +Perforce server info: + 2024/06/19 12:25:31 pid 1056864 perforce@ip-10-0-0-106 127.0.0.1 [p4/2024.1.TEST-TEST_ONLY/LINUX26X86_64/2611120] 'user-fstat -Ob //...' +--- lapse 8.39s +--- paused 0.802s +--- usage 598+67us 304+0io 0+0net 68864k 0pf +--- memory cmd/proc 74mb/74mb +--- rpc msgs/size in+out 2+84225/0mb+45mb himarks 795416/795272 snd/rcv 5.64s/.002s + +2024/06/19 12:25:39 004246895 pid 1056103: Server under resource pressure. Pause rate CPU 59%, mem 20%, CPU pressure 2, mem pressure 1 +` + historical := false + output := basicTest(cfg, input, historical) + + expected := eol.Split(`p4_cmd_counter{serverid="myserverid",cmd="user-fstat"} 1 +p4_cmd_cpu_system_cumulative_seconds{serverid="myserverid",cmd="user-fstat"} 0.067 +p4_cmd_cpu_user_cumulative_seconds{serverid="myserverid",cmd="user-fstat"} 0.598 +p4_cmd_cumulative_seconds{serverid="myserverid",cmd="user-fstat"} 8.390 +p4_cmd_mem_mb{serverid="myserverid"} 74 +p4_cmd_mem_peak_mb{serverid="myserverid"} 74 +p4_cmd_program_counter{serverid="myserverid",program="p4/2024.1.TEST-TEST_ONLY/LINUX26X86_64/2611120"} 1 +p4_cmd_program_cumulative_seconds{serverid="myserverid",program="p4/2024.1.TEST-TEST_ONLY/LINUX26X86_64/2611120"} 8.390 +p4_cmd_running{serverid="myserverid"} 1 +p4_cmd_user_counter{serverid="myserverid",user="perforce"} 1 +p4_cmd_user_cumulative_seconds{serverid="myserverid",user="perforce"} 8.390 +p4_cmds_paused_cumulative{serverid="myserverid"} 0.802 +p4_cmds_running{serverid="myserverid"} 1 +p4_pause_rate_cpu{serverid="myserverid"} 59 +p4_pause_rate_mem{serverid="myserverid"} 20 +p4_pause_state_cpu{serverid="myserverid"} 2 +p4_pause_state_mem{serverid="myserverid"} 1 +p4_prom_cmds_processed{serverid="myserverid"} 1 +p4_prom_log_lines_read{serverid="myserverid"} 23 +p4_prom_svr_events_processed{serverid="myserverid"} 1`, -1) + compareOutput(t, expected, output) +}