cranelift_codegen/
timing.rs

1//! Pass timing.
2//!
3//! This modules provides facilities for timing the execution of individual compilation passes.
4
5use core::fmt;
6use std::any::Any;
7use std::boxed::Box;
8use std::cell::RefCell;
9use std::mem;
10use std::time::Duration;
11
12// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a
13// snake_case name and a plain text description used when printing out the timing report.
14//
15// This macro defines:
16//
17// - A C-style enum containing all the pass names and a `None` variant.
18// - A usize constant with the number of defined passes.
19// - A const array of pass descriptions.
20// - A public function per pass used to start the timing of that pass.
21macro_rules! define_passes {
22    ($($pass:ident: $desc:expr,)+) => {
23        /// A single profiled pass.
24        #[allow(non_camel_case_types)]
25        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
26        pub enum Pass {
27            $(#[doc=$desc] $pass,)+
28            /// No active pass.
29            None,
30        }
31
32        /// The amount of profiled passes.
33        pub const NUM_PASSES: usize = Pass::None as usize;
34
35        const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ];
36
37        $(
38            #[doc=$desc]
39            #[must_use]
40            pub fn $pass() -> Box<dyn Any> {
41                start_pass(Pass::$pass)
42            }
43        )+
44    }
45}
46
47// Pass definitions.
48define_passes! {
49    // All these are used in other crates but defined here so they appear in the unified
50    // `PassTimes` output.
51    process_file: "Processing test file",
52    parse_text: "Parsing textual Cranelift IR",
53    wasm_translate_module: "Translate WASM module",
54    wasm_translate_function: "Translate WASM function",
55
56    verifier: "Verify Cranelift IR",
57
58    compile: "Compilation passes",
59    try_incremental_cache: "Try loading from incremental cache",
60    store_incremental_cache: "Store in incremental cache",
61    flowgraph: "Control flow graph",
62    domtree: "Dominator tree",
63    loop_analysis: "Loop analysis",
64    preopt: "Pre-legalization rewriting",
65    egraph: "Egraph based optimizations",
66    gvn: "Global value numbering",
67    licm: "Loop invariant code motion",
68    unreachable_code: "Remove unreachable blocks",
69    remove_constant_phis: "Remove constant phi-nodes",
70
71    vcode_lower: "VCode lowering",
72    vcode_emit: "VCode emission",
73    vcode_emit_finish: "VCode emission finalization",
74
75    regalloc: "Register allocation",
76    regalloc_checker: "Register allocation symbolic verification",
77    layout_renumber: "Layout full renumbering",
78
79    canonicalize_nans: "Canonicalization of NaNs",
80}
81
82impl Pass {
83    fn idx(self) -> usize {
84        self as usize
85    }
86
87    /// Description of the pass.
88    pub fn description(self) -> &'static str {
89        match DESCRIPTIONS.get(self.idx()) {
90            Some(s) => s,
91            None => "<no pass>",
92        }
93    }
94}
95
96impl fmt::Display for Pass {
97    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
98        f.write_str(self.description())
99    }
100}
101
102/// A profiler.
103pub trait Profiler {
104    /// Start a profiling pass.
105    ///
106    /// Will return a token which when dropped indicates the end of the pass.
107    ///
108    /// Multiple passes can be active at the same time, but they must be started and stopped in a
109    /// LIFO fashion.
110    fn start_pass(&self, pass: Pass) -> Box<dyn Any>;
111}
112
113// Information about passes in a single thread.
114thread_local! {
115    static PROFILER: RefCell<Box<dyn Profiler>> = RefCell::new(Box::new(DefaultProfiler));
116}
117
118/// Set the profiler for the current thread.
119///
120/// Returns the old profiler.
121pub fn set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler> {
122    PROFILER.with(|profiler| std::mem::replace(&mut *profiler.borrow_mut(), new_profiler))
123}
124
125/// Start timing `pass` as a child of the currently running pass, if any.
126///
127/// This function is called by the publicly exposed pass functions.
128fn start_pass(pass: Pass) -> Box<dyn Any> {
129    PROFILER.with(|profiler| profiler.borrow().start_pass(pass))
130}
131
132/// Accumulated timing information for a single pass.
133#[derive(Default, Copy, Clone)]
134struct PassTime {
135    /// Total time spent running this pass including children.
136    total: Duration,
137
138    /// Time spent running in child passes.
139    child: Duration,
140}
141
142/// Accumulated timing for all passes.
143pub struct PassTimes {
144    pass: [PassTime; NUM_PASSES],
145}
146
147impl PassTimes {
148    /// Add `other` to the timings of this `PassTimes`.
149    pub fn add(&mut self, other: &Self) {
150        for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) {
151            a.total += b.total;
152            a.child += b.child;
153        }
154    }
155
156    /// Returns the total amount of time taken by all the passes measured.
157    pub fn total(&self) -> Duration {
158        self.pass.iter().map(|p| p.total - p.child).sum()
159    }
160}
161
162impl Default for PassTimes {
163    fn default() -> Self {
164        Self {
165            pass: [Default::default(); NUM_PASSES],
166        }
167    }
168}
169
170impl fmt::Display for PassTimes {
171    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
172        writeln!(f, "======== ========  ==================================")?;
173        writeln!(f, "   Total     Self  Pass")?;
174        writeln!(f, "-------- --------  ----------------------------------")?;
175        for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) {
176            // Omit passes that haven't run.
177            if time.total == Duration::default() {
178                continue;
179            }
180
181            // Write a duration as secs.millis, trailing space.
182            fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result {
183                // Round to nearest ms by adding 500us.
184                dur += Duration::new(0, 500_000);
185                let ms = dur.subsec_millis();
186                write!(f, "{:4}.{:03} ", dur.as_secs(), ms)
187            }
188
189            fmtdur(time.total, f)?;
190            if let Some(s) = time.total.checked_sub(time.child) {
191                fmtdur(s, f)?;
192            }
193            writeln!(f, " {desc}")?;
194        }
195        writeln!(f, "======== ========  ==================================")
196    }
197}
198
199// Information about passes in a single thread.
200thread_local! {
201    static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default());
202}
203
204/// The default profiler. You can get the results using [`take_current`].
205pub struct DefaultProfiler;
206
207/// Take the current accumulated pass timings and reset the timings for the current thread.
208///
209/// Only applies when [`DefaultProfiler`] is used.
210pub fn take_current() -> PassTimes {
211    PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut()))
212}
213
214#[cfg(feature = "timing")]
215mod enabled {
216    use super::{DefaultProfiler, Pass, Profiler, PASS_TIME};
217    use std::any::Any;
218    use std::boxed::Box;
219    use std::cell::Cell;
220    use std::time::Instant;
221
222    // Information about passes in a single thread.
223    thread_local! {
224        static CURRENT_PASS: Cell<Pass> = const { Cell::new(Pass::None) };
225    }
226
227    impl Profiler for DefaultProfiler {
228        fn start_pass(&self, pass: Pass) -> Box<dyn Any> {
229            let prev = CURRENT_PASS.with(|p| p.replace(pass));
230            log::debug!("timing: Starting {}, (during {})", pass, prev);
231            Box::new(DefaultTimingToken {
232                start: Instant::now(),
233                pass,
234                prev,
235            })
236        }
237    }
238
239    /// A timing token is responsible for timing the currently running pass. Timing starts when it
240    /// is created and ends when it is dropped.
241    ///
242    /// Multiple passes can be active at the same time, but they must be started and stopped in a
243    /// LIFO fashion.
244    struct DefaultTimingToken {
245        /// Start time for this pass.
246        start: Instant,
247
248        // Pass being timed by this token.
249        pass: Pass,
250
251        // The previously active pass which will be restored when this token is dropped.
252        prev: Pass,
253    }
254
255    /// Dropping a timing token indicated the end of the pass.
256    impl Drop for DefaultTimingToken {
257        fn drop(&mut self) {
258            let duration = self.start.elapsed();
259            log::debug!("timing: Ending {}: {}ms", self.pass, duration.as_millis());
260            let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
261            debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
262            PASS_TIME.with(|rc| {
263                let mut table = rc.borrow_mut();
264                table.pass[self.pass.idx()].total += duration;
265                if let Some(parent) = table.pass.get_mut(self.prev.idx()) {
266                    parent.child += duration;
267                }
268            })
269        }
270    }
271}
272
273#[cfg(not(feature = "timing"))]
274mod disabled {
275    use super::{DefaultProfiler, Pass, Profiler};
276    use std::any::Any;
277    use std::boxed::Box;
278
279    impl Profiler for DefaultProfiler {
280        fn start_pass(&self, _pass: Pass) -> Box<dyn Any> {
281            Box::new(())
282        }
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use alloc::string::ToString;
290
291    #[test]
292    fn display() {
293        assert_eq!(Pass::None.to_string(), "<no pass>");
294        assert_eq!(Pass::regalloc.to_string(), "Register allocation");
295    }
296}