linera_service/tracing/
opentelemetry.rs

1// Copyright (c) Zefchain Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4//! OpenTelemetry integration for tracing with OTLP export.
5
6use opentelemetry::{global, propagation::TextMapCompositePropagator, trace::TracerProvider};
7use opentelemetry_otlp::{SpanExporter, WithExportConfig};
8#[cfg(with_testing)]
9use opentelemetry_sdk::trace::InMemorySpanExporter;
10use opentelemetry_sdk::{
11    propagation::{BaggagePropagator, TraceContextPropagator},
12    trace::{BatchSpanProcessor, SdkTracerProvider},
13    Resource,
14};
15use tracing_opentelemetry::OpenTelemetryLayer;
16use tracing_subscriber::{
17    filter::{filter_fn, FilterFn},
18    layer::Layer,
19    prelude::__tracing_subscriber_SubscriberExt as _,
20    util::SubscriberInitExt,
21};
22
23/// Creates a filter that excludes spans with the `opentelemetry.skip` field.
24///
25/// Any span that declares an `opentelemetry.skip` field will be excluded from export,
26/// regardless of the field's value. This is a limitation of the tracing metadata API.
27///
28/// Usage examples:
29/// ```ignore
30/// // Always skip this span
31/// #[tracing::instrument(fields(opentelemetry.skip = true))]
32/// fn internal_helper() { }
33///
34/// // Conditionally skip based on a parameter
35/// #[tracing::instrument(fields(opentelemetry.skip = should_skip))]
36/// fn my_function(should_skip: bool) {
37///     // Will be skipped if should_skip is true when called
38///     // Note: The field must be declared in the span, so the span is
39///     // created with knowledge that it might be skipped
40/// }
41/// ```
42fn opentelemetry_skip_filter() -> FilterFn<impl Fn(&tracing::Metadata<'_>) -> bool> {
43    filter_fn(|metadata| {
44        if !metadata.is_span() {
45            return false;
46        }
47        metadata.fields().field("opentelemetry.skip").is_none()
48    })
49}
50
51/// Initializes tracing with a custom OpenTelemetry tracer provider.
52///
53/// This is an internal function used by both production and test code.
54fn init_with_tracer_provider(log_name: &str, tracer_provider: &SdkTracerProvider) {
55    global::set_tracer_provider(tracer_provider.clone());
56    let tracer = tracer_provider.tracer("linera");
57
58    let opentelemetry_layer =
59        OpenTelemetryLayer::new(tracer).with_filter(opentelemetry_skip_filter());
60
61    let config = crate::tracing::get_env_config(log_name);
62    let maybe_log_file_layer = config.maybe_log_file_layer();
63    let stderr_layer = config.stderr_layer();
64
65    tracing_subscriber::registry()
66        .with(opentelemetry_layer)
67        .with(config.env_filter)
68        .with(maybe_log_file_layer)
69        .with(stderr_layer)
70        .init();
71}
72
73/// Builds an OpenTelemetry layer with the opentelemetry.skip filter.
74///
75/// This is used for testing to avoid setting the global subscriber.
76/// Returns the layer, exporter, and tracer provider (which must be kept alive and shutdown).
77#[cfg(with_testing)]
78pub fn build_opentelemetry_layer_with_test_exporter(
79    log_name: &str,
80) -> (
81    impl tracing_subscriber::Layer<tracing_subscriber::Registry>,
82    InMemorySpanExporter,
83    SdkTracerProvider,
84) {
85    let exporter = InMemorySpanExporter::default();
86    let exporter_clone = exporter.clone();
87
88    let resource = Resource::builder()
89        .with_service_name(log_name.to_string())
90        .build();
91
92    let tracer_provider = SdkTracerProvider::builder()
93        .with_resource(resource)
94        .with_simple_exporter(exporter)
95        .with_sampler(opentelemetry_sdk::trace::Sampler::AlwaysOn)
96        .build();
97
98    global::set_tracer_provider(tracer_provider.clone());
99    let tracer = tracer_provider.tracer("linera");
100    let opentelemetry_layer =
101        OpenTelemetryLayer::new(tracer).with_filter(opentelemetry_skip_filter());
102
103    (opentelemetry_layer, exporter_clone, tracer_provider)
104}
105
106/// Sets up the global text map propagator with TraceContext and Baggage support.
107///
108/// This enables:
109/// - W3C TraceContext propagation (traceparent, tracestate headers)
110/// - W3C Baggage propagation (baggage header for traffic_type, etc.)
111fn setup_propagator() {
112    let propagator = TextMapCompositePropagator::new(vec![
113        Box::new(TraceContextPropagator::new()),
114        Box::new(BaggagePropagator::new()),
115    ]);
116    global::set_text_map_propagator(propagator);
117}
118
119/// Initializes tracing with OpenTelemetry OTLP exporter.
120///
121/// Exports traces using the OTLP protocol to any OpenTelemetry-compatible backend.
122/// Requires the `opentelemetry` feature.
123/// Only enables OpenTelemetry if LINERA_OTLP_EXPORTER_ENDPOINT env var is set.
124/// This prevents DNS errors in environments where OpenTelemetry is not deployed.
125pub fn init(log_name: &str, otlp_endpoint: Option<&str>) {
126    // Set up composite propagator for TraceContext and Baggage
127    setup_propagator();
128
129    // Check if OpenTelemetry endpoint is configured via parameter or env var
130    let endpoint = match otlp_endpoint {
131        Some(ep) if !ep.is_empty() => ep.to_string(),
132        _ => match std::env::var("LINERA_OTLP_EXPORTER_ENDPOINT") {
133            Ok(ep) if !ep.is_empty() => ep,
134            _ => {
135                crate::tracing::init(log_name);
136                tracing::warn!(
137                    "LINERA_OTLP_EXPORTER_ENDPOINT not set and no endpoint provided. \
138                     Falling back to standard tracing without OpenTelemetry span export. \
139                     Baggage propagation is still enabled."
140                );
141                return;
142            }
143        },
144    };
145
146    let resource = Resource::builder()
147        .with_service_name(log_name.to_string())
148        .build();
149
150    let exporter = SpanExporter::builder()
151        .with_tonic()
152        .with_endpoint(endpoint)
153        .build()
154        .expect("Failed to create OTLP exporter");
155
156    // Configure batch processor for high-throughput scenarios
157    // Larger queue (16k instead of 2k default) to handle benchmark load
158    // Faster export (100ms instead of 5s default) to prevent queue buildup
159    let batch_config = opentelemetry_sdk::trace::BatchConfigBuilder::default()
160        .with_max_queue_size(16384) // 8x default, enough for 8 shards under load
161        .with_max_export_batch_size(2048) // Larger batches for efficiency
162        .with_scheduled_delay(std::time::Duration::from_millis(100)) // Fast export to prevent queue buildup
163        .build();
164
165    let batch_processor = BatchSpanProcessor::new(exporter, batch_config);
166
167    let tracer_provider = SdkTracerProvider::builder()
168        .with_resource(resource)
169        .with_span_processor(batch_processor)
170        .with_sampler(opentelemetry_sdk::trace::Sampler::AlwaysOn)
171        .build();
172
173    init_with_tracer_provider(log_name, &tracer_provider);
174}