linera_client/
client_metrics.rs

1// Copyright (c) Zefchain Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use hdrhistogram::Histogram;
5use linera_core::client::TimingType;
6use tokio::{sync::mpsc, task, time};
7use tracing::{debug, info, warn};
8
9#[derive(Debug, Clone)]
10pub struct TimingConfig {
11    pub enabled: bool,
12    pub report_interval_secs: u64,
13}
14
15#[cfg(not(web))]
16impl Default for TimingConfig {
17    fn default() -> Self {
18        Self {
19            enabled: false,
20            report_interval_secs: 5,
21        }
22    }
23}
24
25#[derive(Debug, thiserror::Error)]
26pub enum ClientMetricsError {
27    #[error("Failed to create histogram: {0}")]
28    HistogramCreationError(#[from] hdrhistogram::CreationError),
29    #[error("Failed to record histogram: {0}")]
30    HistogramRecordError(#[from] hdrhistogram::RecordError),
31}
32
33pub struct ExecuteBlockTimingsHistograms {
34    pub submit_block_proposal_histogram: Histogram<u64>,
35    pub update_validators_histogram: Histogram<u64>,
36}
37
38impl ExecuteBlockTimingsHistograms {
39    pub fn new() -> Result<Self, ClientMetricsError> {
40        Ok(Self {
41            submit_block_proposal_histogram: Histogram::<u64>::new(2)?,
42            update_validators_histogram: Histogram::<u64>::new(2)?,
43        })
44    }
45}
46
47pub struct ExecuteOperationsTimingsHistograms {
48    pub execute_block_histogram: Histogram<u64>,
49    pub execute_block_timings_histograms: ExecuteBlockTimingsHistograms,
50}
51
52impl ExecuteOperationsTimingsHistograms {
53    pub fn new() -> Result<Self, ClientMetricsError> {
54        Ok(Self {
55            execute_block_histogram: Histogram::<u64>::new(2)?,
56            execute_block_timings_histograms: ExecuteBlockTimingsHistograms::new()?,
57        })
58    }
59}
60
61pub struct BlockTimingsHistograms {
62    pub execute_operations_histogram: Histogram<u64>,
63    pub execute_operations_timings_histograms: ExecuteOperationsTimingsHistograms,
64}
65
66impl BlockTimingsHistograms {
67    pub fn new() -> Result<Self, ClientMetricsError> {
68        Ok(Self {
69            execute_operations_histogram: Histogram::<u64>::new(2)?,
70            execute_operations_timings_histograms: ExecuteOperationsTimingsHistograms::new()?,
71        })
72    }
73
74    pub fn record_timing(
75        &mut self,
76        duration_ms: u64,
77        timing_type: TimingType,
78    ) -> Result<(), ClientMetricsError> {
79        match timing_type {
80            TimingType::ExecuteOperations => {
81                self.execute_operations_histogram.record(duration_ms)?;
82            }
83            TimingType::ExecuteBlock => {
84                self.execute_operations_timings_histograms
85                    .execute_block_histogram
86                    .record(duration_ms)?;
87            }
88            TimingType::SubmitBlockProposal => {
89                self.execute_operations_timings_histograms
90                    .execute_block_timings_histograms
91                    .submit_block_proposal_histogram
92                    .record(duration_ms)?;
93            }
94            TimingType::UpdateValidators => {
95                self.execute_operations_timings_histograms
96                    .execute_block_timings_histograms
97                    .update_validators_histogram
98                    .record(duration_ms)?;
99            }
100        }
101        Ok(())
102    }
103}
104
105#[cfg(not(web))]
106pub struct ClientMetrics {
107    pub timing_config: TimingConfig,
108    pub timing_sender: mpsc::UnboundedSender<(u64, TimingType)>,
109    pub timing_task: task::JoinHandle<()>,
110}
111
112#[cfg(not(web))]
113impl ClientMetrics {
114    pub fn new(timing_config: TimingConfig) -> Self {
115        let (tx, rx) = mpsc::unbounded_channel();
116        let timing_task = tokio::spawn(Self::timing_collection(
117            rx,
118            timing_config.report_interval_secs,
119        ));
120
121        Self {
122            timing_config,
123            timing_sender: tx,
124            timing_task,
125        }
126    }
127
128    async fn timing_collection(
129        mut receiver: mpsc::UnboundedReceiver<(u64, TimingType)>,
130        report_interval_secs: u64,
131    ) {
132        let mut histograms =
133            BlockTimingsHistograms::new().expect("Failed to create timing histograms");
134
135        let mut report_needed = false;
136        let mut report_timer = time::interval(time::Duration::from_secs(report_interval_secs));
137        report_timer.set_missed_tick_behavior(time::MissedTickBehavior::Skip);
138
139        loop {
140            tokio::select! {
141                timing_data = receiver.recv() => {
142                    match timing_data {
143                        Some((duration_ms, timing_type)) => {
144                            if let Err(e) = histograms.record_timing(duration_ms, timing_type) {
145                                warn!("Failed to record timing data: {}", e);
146                            } else {
147                                report_needed = true;
148                            }
149                        }
150                        None => {
151                            debug!("Timing collection task shutting down - sender closed");
152                            break;
153                        }
154                    }
155                }
156                _ = report_timer.tick() => {
157                    if report_needed {
158                        Self::print_timing_report(&histograms);
159                        report_needed = false;
160                    }
161                }
162            }
163        }
164    }
165
166    fn print_timing_report(histograms: &BlockTimingsHistograms) {
167        for quantile in [0.99, 0.95, 0.90, 0.50] {
168            let formatted_quantile = (quantile * 100.0) as usize;
169
170            info!(
171                "Execute operations p{}: {} ms",
172                formatted_quantile,
173                histograms
174                    .execute_operations_histogram
175                    .value_at_quantile(quantile)
176            );
177
178            info!(
179                "  └─ Execute block p{}: {} ms",
180                formatted_quantile,
181                histograms
182                    .execute_operations_timings_histograms
183                    .execute_block_histogram
184                    .value_at_quantile(quantile)
185            );
186            info!(
187                "    ├─ Submit block proposal p{}: {} ms",
188                formatted_quantile,
189                histograms
190                    .execute_operations_timings_histograms
191                    .execute_block_timings_histograms
192                    .submit_block_proposal_histogram
193                    .value_at_quantile(quantile)
194            );
195            info!(
196                "    └─ Update validators p{}: {} ms",
197                formatted_quantile,
198                histograms
199                    .execute_operations_timings_histograms
200                    .execute_block_timings_histograms
201                    .update_validators_histogram
202                    .value_at_quantile(quantile)
203            );
204        }
205    }
206}