Pass ProfileDataCollector to ReportWriter

triton-inference-server · Jan 25, 2024 · d86e830 · d86e830
1 parent 4489d3a
commit d86e830
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 24 deletions.
diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc
@@ -1,4 +1,4 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -428,8 +428,8 @@ PerfAnalyzer::WriteReport()
   bool should_output_metrics{
       params_->should_collect_metrics && params_->verbose_csv};
 
-  // (TMA-1526) Detect if the model is LLM and report LLM metrics based on that
-  // signal. Currently we just check if it's decoupled model.
+  // TODO (TMA-1557): Detect if the model is LLM and report LLM metrics based
+  // on that signal. Currently we simply check if it's a decoupled model.
   bool should_output_llm_metrics{
       parser_->IsDecoupled() && !params_->profile_export_file.empty()};
 
@@ -440,7 +440,7 @@ PerfAnalyzer::WriteReport()
           params_->filename, params_->targeting_concurrency(), perf_statuses_,
           params_->verbose_csv, profiler_->IncludeServerStats(),
           params_->percentile, parser_, &writer, should_output_metrics,
-          collector_->GetData(), should_output_llm_metrics),
+          collector_, should_output_llm_metrics),
       "failed to create report writer");
 
   writer->GenerateReport();

diff --git a/src/c++/perf_analyzer/profile_data_collector.h b/src/c++/perf_analyzer/profile_data_collector.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -101,6 +101,8 @@ class ProfileDataCollector {
 
   std::string& GetVersion() { return version_; }
 
+  bool IsEmpty() { return experiments_.empty(); }
+
  private:
   ProfileDataCollector() = default;
 

diff --git a/src/c++/perf_analyzer/report_writer.cc b/src/c++/perf_analyzer/report_writer.cc
@@ -1,4 +1,4 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -42,12 +42,12 @@ ReportWriter::Create(
     const bool include_server_stats, const int32_t percentile,
     const std::shared_ptr<ModelParser>& parser,
     std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics,
-    const std::vector<Experiment>& experiments,
+    const std::shared_ptr<ProfileDataCollector>& collector,
     const bool should_output_llm_metrics)
 {
   std::unique_ptr<ReportWriter> local_writer(new ReportWriter(
       filename, target_concurrency, summary, verbose_csv, include_server_stats,
-      percentile, parser, should_output_metrics, experiments,
+      percentile, parser, should_output_metrics, collector,
       should_output_llm_metrics));
 
   *writer = std::move(local_writer);
@@ -61,13 +61,13 @@ ReportWriter::ReportWriter(
     const bool include_server_stats, const int32_t percentile,
     const std::shared_ptr<ModelParser>& parser,
     const bool should_output_metrics,
-    const std::vector<Experiment>& experiments,
+    const std::shared_ptr<ProfileDataCollector>& collector,
     const bool should_output_llm_metrics)
     : filename_(filename), target_concurrency_(target_concurrency),
       summary_(summary), verbose_csv_(verbose_csv),
       include_server_stats_(include_server_stats), percentile_(percentile),
       parser_(parser), should_output_metrics_(should_output_metrics),
-      experiments_(experiments),
+      collector_(collector),
       should_output_llm_metrics_(should_output_llm_metrics)
 {
 }
@@ -247,7 +247,7 @@ ReportWriter::GenerateReport()
         }
       }
       if (should_output_llm_metrics_) {
-        if (experiments_.empty()) {
+        if (collector_->IsEmpty()) {
           throw PerfAnalyzerException(
               "Attempted to write LLM metrics when profile data is empty.",
               GENERIC_ERROR);
@@ -410,10 +410,11 @@ ReportWriter::WriteGpuMetrics(std::ostream& ofs, const Metrics& metric)
 void
 ReportWriter::WriteLlmMetrics(std::ostream& ofs)
 {
+  const std::vector<Experiment>& experiments{collector_->GetData()};
   std::vector<double> first_token_latencies;
   std::vector<double> t2t_latencies;
 
-  for (const auto& exp : experiments_) {
+  for (const auto& exp : experiments) {
     for (const auto& req : exp.requests) {
       for (size_t i = 0; i < req.response_times_.size(); i++) {
         if (i <= 0) {

diff --git a/src/c++/perf_analyzer/report_writer.h b/src/c++/perf_analyzer/report_writer.h
@@ -1,4 +1,4 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -70,7 +70,7 @@ class ReportWriter {
       const bool include_server_stats, const int32_t percentile,
       const std::shared_ptr<ModelParser>& parser,
       std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics,
-      const std::vector<Experiment>& experiments,
+      const std::shared_ptr<ProfileDataCollector>& collector,
       const bool should_output_llm_metrics);
 
   void GenerateReport();
@@ -92,7 +92,7 @@ class ReportWriter {
       const bool include_server_stats, const int32_t percentile,
       const std::shared_ptr<ModelParser>& parser,
       const bool should_output_metrics,
-      const std::vector<Experiment>& experiments,
+      const std::shared_ptr<ProfileDataCollector>& collector,
       const bool should_output_llm_metrics);
 
 
@@ -104,7 +104,7 @@ class ReportWriter {
   std::vector<pa::PerfStatus> summary_{};
   const std::shared_ptr<ModelParser>& parser_{nullptr};
   const bool should_output_metrics_{false};
-  const std::vector<Experiment> experiments_{};
+  const std::shared_ptr<ProfileDataCollector>& collector_{nullptr};
   const bool should_output_llm_metrics_{false};
 
 #ifndef DOCTEST_CONFIG_DISABLE

diff --git a/src/c++/perf_analyzer/test_report_writer.cc b/src/c++/perf_analyzer/test_report_writer.cc
@@ -1,4 +1,4 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -35,10 +35,10 @@ namespace triton { namespace perfanalyzer {
 class TestReportWriter : ReportWriter {
  public:
   TestReportWriter() = default;
-  TestReportWriter(std::vector<Experiment>& experiments)
+  TestReportWriter(const std::shared_ptr<ProfileDataCollector>& collector)
       : ReportWriter(
             "", false, std::vector<pa::PerfStatus>{}, false, false, 0, nullptr,
-            false, experiments, true)
+            false, collector, true)
   {
   }
   void WriteGpuMetrics(std::ostream& ofs, const Metrics& metrics)
@@ -105,11 +105,16 @@ TEST_CASE("testing WriteGpuMetrics")
 
 TEST_CASE("report_writer: WriteLlmMetrics")
 {
+  std::shared_ptr<ProfileDataCollector> collector;
+  CHECK_NOTHROW_MESSAGE(
+      pa::ProfileDataCollector::Create(&collector),
+      "failed to create profile data collector");
+
+  InferenceLoadMode infer_mode{10, 20.0};  // dummy values
+
   // Create a dummy request records
   using std::chrono::system_clock;
   using std::chrono::time_point;
-
-  Experiment experiment;
   auto clock_epoch{time_point<system_clock>()};
 
   uint64_t seq_id1{123};
@@ -133,16 +138,15 @@ TEST_CASE("report_writer: WriteLlmMetrics")
       seq_id2,  false};
 
   std::vector<RequestRecord> request_records{rr1, rr2};
-  experiment.requests = std::move(request_records);
-  std::vector<Experiment> experiments{experiment};
+  collector->AddData(infer_mode, std::move(request_records));
 
   // Avg first token latency
   // = ((response1 - request1) + (response3 - request2)) / 2
   // = (3 + 1) / 2 = 2 us
   // Avg token-to-token latency
   // = ((response2 - response1) + (response4 - response3)) / 2
   // = (1 + 2) / 2 = 1.5 us
-  TestReportWriter trw(experiments);
+  TestReportWriter trw(collector);
   std::ostringstream actual_output{};
   trw.WriteLlmMetrics(actual_output);
   const std::string expected_output{",2,1.5"};