Skip to main content

linera_client/
client_metrics.rs

1// Copyright (c) Zefchain Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use hdrhistogram::Histogram;
5use linera_core::client::TimingType;
6use tokio::{sync::mpsc, task, time};
7use tracing::{debug, info, warn};
8
9#[derive(Debug, Clone)]
10pub struct TimingConfig {
11    pub enabled: bool,
12    pub report_interval_secs: u64,
13}
14
15#[cfg(not(web))]
16impl Default for TimingConfig {
17    fn default() -> Self {
18        Self {
19            enabled: false,
20            report_interval_secs: 5,
21        }
22    }
23}
24
25#[derive(Debug, thiserror::Error)]
26pub enum ClientMetricsError {
27    #[error("Failed to create histogram: {0}")]
28    HistogramCreationError(#[from] hdrhistogram::CreationError),
29    #[error("Failed to record histogram: {0}")]
30    HistogramRecordError(#[from] hdrhistogram::RecordError),
31}
32
33pub struct ExecuteBlockTimingsHistograms {
34    pub submit_block_proposal_histogram: Histogram<u64>,
35    pub update_validators_histogram: Histogram<u64>,
36}
37
38impl ExecuteBlockTimingsHistograms {
39    pub fn new() -> Result<Self, ClientMetricsError> {
40        Ok(Self {
41            submit_block_proposal_histogram: Histogram::<u64>::new(2)?,
42            update_validators_histogram: Histogram::<u64>::new(2)?,
43        })
44    }
45}
46
47pub struct ExecuteOperationsTimingsHistograms {
48    pub execute_block_histogram: Histogram<u64>,
49    pub execute_block_timings_histograms: ExecuteBlockTimingsHistograms,
50}
51
52impl ExecuteOperationsTimingsHistograms {
53    pub fn new() -> Result<Self, ClientMetricsError> {
54        Ok(Self {
55            execute_block_histogram: Histogram::<u64>::new(2)?,
56            execute_block_timings_histograms: ExecuteBlockTimingsHistograms::new()?,
57        })
58    }
59}
60
61pub struct BlockTimingsHistograms {
62    pub execute_operations_histogram: Histogram<u64>,
63    pub execute_operations_timings_histograms: ExecuteOperationsTimingsHistograms,
64}
65
66impl BlockTimingsHistograms {
67    pub fn new() -> Result<Self, ClientMetricsError> {
68        Ok(Self {
69            execute_operations_histogram: Histogram::<u64>::new(2)?,
70            execute_operations_timings_histograms: ExecuteOperationsTimingsHistograms::new()?,
71        })
72    }
73
74    pub fn record_timing(
75        &mut self,
76        duration_ms: u64,
77        timing_type: TimingType,
78    ) -> Result<(), ClientMetricsError> {
79        match timing_type {
80            TimingType::ExecuteOperations => {
81                self.execute_operations_histogram.record(duration_ms)?;
82            }
83            TimingType::ExecuteBlock => {
84                self.execute_operations_timings_histograms
85                    .execute_block_histogram
86                    .record(duration_ms)?;
87            }
88            TimingType::SubmitBlockProposal => {
89                self.execute_operations_timings_histograms
90                    .execute_block_timings_histograms
91                    .submit_block_proposal_histogram
92                    .record(duration_ms)?;
93            }
94            TimingType::UpdateValidators => {
95                self.execute_operations_timings_histograms
96                    .execute_block_timings_histograms
97                    .update_validators_histogram
98                    .record(duration_ms)?;
99            }
100        }
101        Ok(())
102    }
103}
104
105#[cfg(not(web))]
106pub struct ClientMetrics {
107    pub timing_config: TimingConfig,
108    pub timing_sender: mpsc::UnboundedSender<(u64, TimingType)>,
109    pub timing_task: task::JoinHandle<()>,
110}
111
112#[cfg(not(web))]
113impl ClientMetrics {
114    pub fn new(timing_config: TimingConfig) -> Self {
115        let (tx, rx) = mpsc::unbounded_channel();
116        let timing_task = tokio::spawn(Self::timing_collection(
117            rx,
118            timing_config.report_interval_secs,
119        ));
120
121        Self {
122            timing_config,
123            timing_sender: tx,
124            timing_task,
125        }
126    }
127
128    async fn timing_collection(
129        mut receiver: mpsc::UnboundedReceiver<(u64, TimingType)>,
130        report_interval_secs: u64,
131    ) {
132        let mut histograms =
133            BlockTimingsHistograms::new().expect("Failed to create timing histograms");
134
135        let mut report_needed = false;
136        let mut report_timer = time::interval(time::Duration::from_secs(report_interval_secs));
137        report_timer.set_missed_tick_behavior(time::MissedTickBehavior::Skip);
138
139        loop {
140            tokio::select! {
141                timing_data = receiver.recv() => {
142                    match timing_data {
143                        Some((duration_ms, timing_type)) => {
144                            if let Err(e) = histograms.record_timing(duration_ms, timing_type) {
145                                warn!("Failed to record timing data: {}", e);
146                            } else {
147                                report_needed = true;
148                            }
149                        }
150                        None => {
151                            debug!("Timing collection task shutting down - sender closed");
152                            break;
153                        }
154                    }
155                }
156                _ = report_timer.tick() => {
157                    if report_needed {
158                        Self::print_timing_report(&histograms);
159                        report_needed = false;
160                    }
161                }
162            }
163        }
164    }
165
166    #[expect(
167        clippy::cast_possible_truncation,
168        clippy::cast_sign_loss,
169        reason = "quantile is a fixed small positive value used for display"
170    )]
171    fn print_timing_report(histograms: &BlockTimingsHistograms) {
172        for quantile in [0.99, 0.95, 0.90, 0.50] {
173            let formatted_quantile = (quantile * 100.0) as usize;
174
175            info!(
176                "Execute operations p{}: {} ms",
177                formatted_quantile,
178                histograms
179                    .execute_operations_histogram
180                    .value_at_quantile(quantile)
181            );
182
183            info!(
184                "  └─ Execute block p{}: {} ms",
185                formatted_quantile,
186                histograms
187                    .execute_operations_timings_histograms
188                    .execute_block_histogram
189                    .value_at_quantile(quantile)
190            );
191            info!(
192                "    ├─ Submit block proposal p{}: {} ms",
193                formatted_quantile,
194                histograms
195                    .execute_operations_timings_histograms
196                    .execute_block_timings_histograms
197                    .submit_block_proposal_histogram
198                    .value_at_quantile(quantile)
199            );
200            info!(
201                "    └─ Update validators p{}: {} ms",
202                formatted_quantile,
203                histograms
204                    .execute_operations_timings_histograms
205                    .execute_block_timings_histograms
206                    .update_validators_histogram
207                    .value_at_quantile(quantile)
208            );
209        }
210    }
211}