Skip to content

Commit

Permalink
Pass ProfileDataCollector to ReportWriter
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Jan 25, 2024
1 parent 4489d3a commit d86e830
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 24 deletions.
8 changes: 4 additions & 4 deletions src/c++/perf_analyzer/perf_analyzer.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -428,8 +428,8 @@ PerfAnalyzer::WriteReport()
bool should_output_metrics{
params_->should_collect_metrics && params_->verbose_csv};

// (TMA-1526) Detect if the model is LLM and report LLM metrics based on that
// signal. Currently we just check if it's decoupled model.
// TODO (TMA-1557): Detect if the model is LLM and report LLM metrics based
// on that signal. Currently we simply check if it's a decoupled model.
bool should_output_llm_metrics{
parser_->IsDecoupled() && !params_->profile_export_file.empty()};

Expand All @@ -440,7 +440,7 @@ PerfAnalyzer::WriteReport()
params_->filename, params_->targeting_concurrency(), perf_statuses_,
params_->verbose_csv, profiler_->IncludeServerStats(),
params_->percentile, parser_, &writer, should_output_metrics,
collector_->GetData(), should_output_llm_metrics),
collector_, should_output_llm_metrics),
"failed to create report writer");

writer->GenerateReport();
Expand Down
4 changes: 3 additions & 1 deletion src/c++/perf_analyzer/profile_data_collector.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -101,6 +101,8 @@ class ProfileDataCollector {

std::string& GetVersion() { return version_; }

bool IsEmpty() { return experiments_.empty(); }

private:
ProfileDataCollector() = default;

Expand Down
15 changes: 8 additions & 7 deletions src/c++/perf_analyzer/report_writer.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -42,12 +42,12 @@ ReportWriter::Create(
const bool include_server_stats, const int32_t percentile,
const std::shared_ptr<ModelParser>& parser,
std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics,
const std::vector<Experiment>& experiments,
const std::shared_ptr<ProfileDataCollector>& collector,
const bool should_output_llm_metrics)
{
std::unique_ptr<ReportWriter> local_writer(new ReportWriter(
filename, target_concurrency, summary, verbose_csv, include_server_stats,
percentile, parser, should_output_metrics, experiments,
percentile, parser, should_output_metrics, collector,
should_output_llm_metrics));

*writer = std::move(local_writer);
Expand All @@ -61,13 +61,13 @@ ReportWriter::ReportWriter(
const bool include_server_stats, const int32_t percentile,
const std::shared_ptr<ModelParser>& parser,
const bool should_output_metrics,
const std::vector<Experiment>& experiments,
const std::shared_ptr<ProfileDataCollector>& collector,
const bool should_output_llm_metrics)
: filename_(filename), target_concurrency_(target_concurrency),
summary_(summary), verbose_csv_(verbose_csv),
include_server_stats_(include_server_stats), percentile_(percentile),
parser_(parser), should_output_metrics_(should_output_metrics),
experiments_(experiments),
collector_(collector),
should_output_llm_metrics_(should_output_llm_metrics)
{
}
Expand Down Expand Up @@ -247,7 +247,7 @@ ReportWriter::GenerateReport()
}
}
if (should_output_llm_metrics_) {
if (experiments_.empty()) {
if (collector_->IsEmpty()) {
throw PerfAnalyzerException(
"Attempted to write LLM metrics when profile data is empty.",
GENERIC_ERROR);
Expand Down Expand Up @@ -410,10 +410,11 @@ ReportWriter::WriteGpuMetrics(std::ostream& ofs, const Metrics& metric)
void
ReportWriter::WriteLlmMetrics(std::ostream& ofs)
{
const std::vector<Experiment>& experiments{collector_->GetData()};
std::vector<double> first_token_latencies;
std::vector<double> t2t_latencies;

for (const auto& exp : experiments_) {
for (const auto& exp : experiments) {
for (const auto& req : exp.requests) {
for (size_t i = 0; i < req.response_times_.size(); i++) {
if (i <= 0) {
Expand Down
8 changes: 4 additions & 4 deletions src/c++/perf_analyzer/report_writer.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -70,7 +70,7 @@ class ReportWriter {
const bool include_server_stats, const int32_t percentile,
const std::shared_ptr<ModelParser>& parser,
std::unique_ptr<ReportWriter>* writer, const bool should_output_metrics,
const std::vector<Experiment>& experiments,
const std::shared_ptr<ProfileDataCollector>& collector,
const bool should_output_llm_metrics);

void GenerateReport();
Expand All @@ -92,7 +92,7 @@ class ReportWriter {
const bool include_server_stats, const int32_t percentile,
const std::shared_ptr<ModelParser>& parser,
const bool should_output_metrics,
const std::vector<Experiment>& experiments,
const std::shared_ptr<ProfileDataCollector>& collector,
const bool should_output_llm_metrics);


Expand All @@ -104,7 +104,7 @@ class ReportWriter {
std::vector<pa::PerfStatus> summary_{};
const std::shared_ptr<ModelParser>& parser_{nullptr};
const bool should_output_metrics_{false};
const std::vector<Experiment> experiments_{};
const std::shared_ptr<ProfileDataCollector>& collector_{nullptr};
const bool should_output_llm_metrics_{false};

#ifndef DOCTEST_CONFIG_DISABLE
Expand Down
20 changes: 12 additions & 8 deletions src/c++/perf_analyzer/test_report_writer.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -35,10 +35,10 @@ namespace triton { namespace perfanalyzer {
class TestReportWriter : ReportWriter {
public:
TestReportWriter() = default;
TestReportWriter(std::vector<Experiment>& experiments)
TestReportWriter(const std::shared_ptr<ProfileDataCollector>& collector)
: ReportWriter(
"", false, std::vector<pa::PerfStatus>{}, false, false, 0, nullptr,
false, experiments, true)
false, collector, true)
{
}
void WriteGpuMetrics(std::ostream& ofs, const Metrics& metrics)
Expand Down Expand Up @@ -105,11 +105,16 @@ TEST_CASE("testing WriteGpuMetrics")

TEST_CASE("report_writer: WriteLlmMetrics")
{
std::shared_ptr<ProfileDataCollector> collector;
CHECK_NOTHROW_MESSAGE(
pa::ProfileDataCollector::Create(&collector),
"failed to create profile data collector");

InferenceLoadMode infer_mode{10, 20.0}; // dummy values

// Create a dummy request records
using std::chrono::system_clock;
using std::chrono::time_point;

Experiment experiment;
auto clock_epoch{time_point<system_clock>()};

uint64_t seq_id1{123};
Expand All @@ -133,16 +138,15 @@ TEST_CASE("report_writer: WriteLlmMetrics")
seq_id2, false};

std::vector<RequestRecord> request_records{rr1, rr2};
experiment.requests = std::move(request_records);
std::vector<Experiment> experiments{experiment};
collector->AddData(infer_mode, std::move(request_records));

// Avg first token latency
// = ((response1 - request1) + (response3 - request2)) / 2
// = (3 + 1) / 2 = 2 us
// Avg token-to-token latency
// = ((response2 - response1) + (response4 - response3)) / 2
// = (1 + 2) / 2 = 1.5 us
TestReportWriter trw(experiments);
TestReportWriter trw(collector);
std::ostringstream actual_output{};
trw.WriteLlmMetrics(actual_output);
const std::string expected_output{",2,1.5"};
Expand Down

0 comments on commit d86e830

Please sign in to comment.