Skip to main content

gimli/read/
endian_reader.rs

1//! Defining custom `Reader`s quickly.
2
3use alloc::borrow::Cow;
4use alloc::rc::Rc;
5use alloc::string::String;
6use alloc::sync::Arc;
7use core::fmt::Debug;
8use core::hash::{Hash, Hasher};
9use core::ops::{Deref, Index, Range, RangeFrom, RangeTo};
10use core::slice;
11use core::str;
12use stable_deref_trait::CloneStableDeref;
13
14use crate::endianity::Endianity;
15use crate::read::{Error, Reader, ReaderOffsetId, Result};
16
17/// A reference counted, non-thread-safe slice of bytes and associated
18/// endianity.
19///
20/// ```
21/// # #[cfg(feature = "std")] {
22/// use std::rc::Rc;
23///
24/// let buf = Rc::from(&[1, 2, 3, 4][..]);
25/// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian);
26/// # let _ = reader;
27/// # }
28/// ```
29pub type EndianRcSlice<Endian> = EndianReader<Endian, Rc<[u8]>>;
30
31/// An atomically reference counted, thread-safe slice of bytes and associated
32/// endianity.
33///
34/// ```
35/// # #[cfg(feature = "std")] {
36/// use std::sync::Arc;
37///
38/// let buf = Arc::from(&[1, 2, 3, 4][..]);
39/// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian);
40/// # let _ = reader;
41/// # }
42/// ```
43pub type EndianArcSlice<Endian> = EndianReader<Endian, Arc<[u8]>>;
44
45/// An easy way to define a custom `Reader` implementation with a reference to a
46/// generic buffer of bytes and an associated endianity.
47///
48/// Note that the whole original buffer is kept alive in memory even if there is
49/// only one reader that references only a handful of bytes from that original
50/// buffer. That is, `EndianReader` will not do any copying, moving, or
51/// compacting in order to free up unused regions of the original buffer. If you
52/// require this kind of behavior, it is up to you to implement `Reader`
53/// directly by-hand.
54///
55/// # Example
56///
57/// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`.
58/// You can wrap that `mmap`ed file up in a `MmapFile` type and use
59/// `EndianReader<Rc<MmapFile>>` or `EndianReader<Arc<MmapFile>>` as readers as
60/// long as `MmapFile` dereferences to the underlying `[u8]` data.
61///
62/// ```
63/// use std::io;
64/// use std::ops::Deref;
65/// use std::path::Path;
66/// use std::slice;
67/// use std::sync::Arc;
68///
69/// /// A type that represents an `mmap`ed file.
70/// #[derive(Debug)]
71/// pub struct MmapFile {
72///     ptr: *const u8,
73///     len: usize,
74/// }
75///
76/// impl MmapFile {
77///     pub fn new(path: &Path) -> io::Result<MmapFile> {
78///         // Call `mmap` and check for errors and all that...
79/// #       unimplemented!()
80///     }
81/// }
82///
83/// impl Drop for MmapFile {
84///     fn drop(&mut self) {
85///         // Call `munmap` to clean up after ourselves...
86/// #       unimplemented!()
87///     }
88/// }
89///
90/// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory.
91/// impl Deref for MmapFile {
92///     type Target = [u8];
93///     fn deref(&self) -> &[u8] {
94///         unsafe {
95///             slice::from_raw_parts(self.ptr, self.len)
96///         }
97///     }
98/// }
99///
100/// /// A type that represents a shared `mmap`ed file.
101/// #[derive(Debug, Clone)]
102/// pub struct ArcMmapFile(Arc<MmapFile>);
103///
104/// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory.
105/// impl Deref for ArcMmapFile {
106///     type Target = [u8];
107///     fn deref(&self) -> &[u8] {
108///         &self.0
109///     }
110/// }
111///
112/// // These are both valid for any `Rc` or `Arc`.
113/// unsafe impl gimli::StableDeref for ArcMmapFile {}
114/// unsafe impl gimli::CloneStableDeref for ArcMmapFile {}
115///
116/// /// A `gimli::Reader` that is backed by an `mmap`ed file!
117/// pub type MmapFileReader<Endian> = gimli::EndianReader<Endian, ArcMmapFile>;
118/// # fn test(_: &MmapFileReader<gimli::NativeEndian>) { }
119/// ```
120#[derive(Debug, Clone, Copy)]
121pub struct EndianReader<Endian, T>
122where
123    Endian: Endianity,
124    T: CloneStableDeref<Target = [u8]> + Debug,
125{
126    range: SubRange<T>,
127    endian: Endian,
128}
129
130impl<Endian, T1, T2> PartialEq<EndianReader<Endian, T2>> for EndianReader<Endian, T1>
131where
132    Endian: Endianity,
133    T1: CloneStableDeref<Target = [u8]> + Debug,
134    T2: CloneStableDeref<Target = [u8]> + Debug,
135{
136    fn eq(&self, rhs: &EndianReader<Endian, T2>) -> bool {
137        self.bytes() == rhs.bytes()
138    }
139}
140
141impl<Endian, T> Eq for EndianReader<Endian, T>
142where
143    Endian: Endianity,
144    T: CloneStableDeref<Target = [u8]> + Debug,
145{
146}
147
148impl<Endian, T> Hash for EndianReader<Endian, T>
149where
150    Endian: Endianity,
151    T: CloneStableDeref<Target = [u8]> + Debug,
152{
153    fn hash<H: Hasher>(&self, state: &mut H) {
154        // This must match the `PartialEq` implementation.
155        self.bytes().hash(state);
156    }
157}
158
159// This is separated out from `EndianReader` so that we can avoid running afoul
160// of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call
161// `self.endian.read_whatever` on the result. The problem is that the returned
162// slice keeps the `&mut self` borrow active, so we wouldn't be able to access
163// `self.endian`. Splitting the sub-range out from the endian lets us work
164// around this, making it so that only the `self.range` borrow is held active,
165// not all of `self`.
166//
167// This also serves to encapsulate the unsafe code concerning `CloneStableDeref`.
168// The `bytes` member is held so that the bytes live long enough, and the
169// `CloneStableDeref` ensures these bytes never move.  The `ptr` and `len`
170// members point inside `bytes`, and are updated during read operations.
171#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
172struct SubRange<T>
173where
174    T: CloneStableDeref<Target = [u8]> + Debug,
175{
176    bytes: T,
177    ptr: *const u8,
178    len: usize,
179}
180
181unsafe impl<T> Send for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Send {}
182
183unsafe impl<T> Sync for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Sync {}
184
185impl<T> SubRange<T>
186where
187    T: CloneStableDeref<Target = [u8]> + Debug,
188{
189    #[inline]
190    fn new(bytes: T) -> Self {
191        let ptr = bytes.as_ptr();
192        let len = bytes.len();
193        SubRange { bytes, ptr, len }
194    }
195
196    #[inline]
197    fn bytes(&self) -> &[u8] {
198        // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified,
199        // and all operations that modify `ptr` and `len` ensure they stay in range.
200        unsafe { slice::from_raw_parts(self.ptr, self.len) }
201    }
202
203    #[inline]
204    fn len(&self) -> usize {
205        self.len
206    }
207
208    #[inline]
209    fn truncate(&mut self, len: usize) {
210        assert!(len <= self.len);
211        self.len = len;
212    }
213
214    #[inline]
215    fn skip(&mut self, len: usize) {
216        assert!(len <= self.len);
217        self.ptr = unsafe { self.ptr.add(len) };
218        self.len -= len;
219    }
220
221    #[inline]
222    fn read_slice(&mut self, len: usize) -> Option<&[u8]> {
223        if self.len() < len {
224            None
225        } else {
226            // Same as for `bytes()`.
227            let bytes = unsafe { slice::from_raw_parts(self.ptr, len) };
228            self.skip(len);
229            Some(bytes)
230        }
231    }
232}
233
234impl<Endian, T> EndianReader<Endian, T>
235where
236    Endian: Endianity,
237    T: CloneStableDeref<Target = [u8]> + Debug,
238{
239    /// Construct a new `EndianReader` with the given bytes.
240    #[inline]
241    pub fn new(bytes: T, endian: Endian) -> EndianReader<Endian, T> {
242        EndianReader {
243            range: SubRange::new(bytes),
244            endian,
245        }
246    }
247
248    /// Return a reference to the raw bytes underlying this reader.
249    #[inline]
250    pub fn bytes(&self) -> &[u8] {
251        self.range.bytes()
252    }
253}
254
255/// # Range Methods
256///
257/// Unfortunately, `std::ops::Index` *must* return a reference, so we can't
258/// implement `Index<Range<usize>>` to return a new `EndianReader` the way we
259/// would like to. Instead, we abandon fancy indexing operators and have these
260/// plain old methods.
261impl<Endian, T> EndianReader<Endian, T>
262where
263    Endian: Endianity,
264    T: CloneStableDeref<Target = [u8]> + Debug,
265{
266    /// Take the given `start..end` range of the underlying buffer and return a
267    /// new `EndianReader`.
268    ///
269    /// ```
270    /// # #[cfg(feature = "std")] {
271    /// use gimli::{EndianReader, LittleEndian};
272    /// use std::sync::Arc;
273    ///
274    /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
275    /// let reader = EndianReader::new(buf.clone(), LittleEndian);
276    /// assert_eq!(reader.range(1..3),
277    ///            EndianReader::new(&buf[1..3], LittleEndian));
278    /// # }
279    /// ```
280    ///
281    /// # Panics
282    ///
283    /// Panics if the range is out of bounds.
284    pub fn range(&self, idx: Range<usize>) -> EndianReader<Endian, T> {
285        let mut r = self.clone();
286        r.range.skip(idx.start);
287        r.range.truncate(idx.len());
288        r
289    }
290
291    /// Take the given `start..` range of the underlying buffer and return a new
292    /// `EndianReader`.
293    ///
294    /// ```
295    /// # #[cfg(feature = "std")] {
296    /// use gimli::{EndianReader, LittleEndian};
297    /// use std::sync::Arc;
298    ///
299    /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
300    /// let reader = EndianReader::new(buf.clone(), LittleEndian);
301    /// assert_eq!(reader.range_from(2..),
302    ///            EndianReader::new(&buf[2..], LittleEndian));
303    /// # }
304    /// ```
305    ///
306    /// # Panics
307    ///
308    /// Panics if the range is out of bounds.
309    pub fn range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T> {
310        let mut r = self.clone();
311        r.range.skip(idx.start);
312        r
313    }
314
315    /// Take the given `..end` range of the underlying buffer and return a new
316    /// `EndianReader`.
317    ///
318    /// ```
319    /// # #[cfg(feature = "std")] {
320    /// use gimli::{EndianReader, LittleEndian};
321    /// use std::sync::Arc;
322    ///
323    /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
324    /// let reader = EndianReader::new(buf.clone(), LittleEndian);
325    /// assert_eq!(reader.range_to(..3),
326    ///            EndianReader::new(&buf[..3], LittleEndian));
327    /// # }
328    /// ```
329    ///
330    /// # Panics
331    ///
332    /// Panics if the range is out of bounds.
333    pub fn range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T> {
334        let mut r = self.clone();
335        r.range.truncate(idx.end);
336        r
337    }
338}
339
340impl<Endian, T> Index<usize> for EndianReader<Endian, T>
341where
342    Endian: Endianity,
343    T: CloneStableDeref<Target = [u8]> + Debug,
344{
345    type Output = u8;
346    fn index(&self, idx: usize) -> &Self::Output {
347        &self.bytes()[idx]
348    }
349}
350
351impl<Endian, T> Index<RangeFrom<usize>> for EndianReader<Endian, T>
352where
353    Endian: Endianity,
354    T: CloneStableDeref<Target = [u8]> + Debug,
355{
356    type Output = [u8];
357    fn index(&self, idx: RangeFrom<usize>) -> &Self::Output {
358        &self.bytes()[idx]
359    }
360}
361
362impl<Endian, T> Deref for EndianReader<Endian, T>
363where
364    Endian: Endianity,
365    T: CloneStableDeref<Target = [u8]> + Debug,
366{
367    type Target = [u8];
368    fn deref(&self) -> &Self::Target {
369        self.bytes()
370    }
371}
372
373impl<Endian, T> Reader for EndianReader<Endian, T>
374where
375    Endian: Endianity,
376    T: CloneStableDeref<Target = [u8]> + Debug,
377{
378    type Endian = Endian;
379    type Offset = usize;
380
381    #[inline]
382    fn endian(&self) -> Endian {
383        self.endian
384    }
385
386    #[inline]
387    fn len(&self) -> usize {
388        self.range.len()
389    }
390
391    #[inline]
392    fn empty(&mut self) {
393        self.range.truncate(0);
394    }
395
396    #[inline]
397    fn truncate(&mut self, len: usize) -> Result<()> {
398        if self.len() < len {
399            Err(Error::UnexpectedEof(self.offset_id()))
400        } else {
401            self.range.truncate(len);
402            Ok(())
403        }
404    }
405
406    #[inline]
407    fn offset_from(&self, base: &EndianReader<Endian, T>) -> usize {
408        let base_ptr = base.bytes().as_ptr() as usize;
409        let ptr = self.bytes().as_ptr() as usize;
410        debug_assert!(base_ptr <= ptr);
411        debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len());
412        ptr - base_ptr
413    }
414
415    #[inline]
416    fn offset_id(&self) -> ReaderOffsetId {
417        ReaderOffsetId(self.bytes().as_ptr() as u64)
418    }
419
420    #[inline]
421    fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset> {
422        let id = id.0;
423        let self_id = self.bytes().as_ptr() as u64;
424        let self_len = self.bytes().len() as u64;
425        if id >= self_id && id <= self_id + self_len {
426            Some((id - self_id) as usize)
427        } else {
428            None
429        }
430    }
431
432    #[inline]
433    fn find(&self, byte: u8) -> Result<usize> {
434        self.bytes()
435            .iter()
436            .position(|x| *x == byte)
437            .ok_or_else(|| Error::UnexpectedEof(self.offset_id()))
438    }
439
440    #[inline]
441    fn skip(&mut self, len: usize) -> Result<()> {
442        if self.len() < len {
443            Err(Error::UnexpectedEof(self.offset_id()))
444        } else {
445            self.range.skip(len);
446            Ok(())
447        }
448    }
449
450    #[inline]
451    fn split(&mut self, len: usize) -> Result<Self> {
452        if self.len() < len {
453            Err(Error::UnexpectedEof(self.offset_id()))
454        } else {
455            let mut r = self.clone();
456            r.range.truncate(len);
457            self.range.skip(len);
458            Ok(r)
459        }
460    }
461
462    #[inline]
463    fn to_slice(&self) -> Result<Cow<'_, [u8]>> {
464        Ok(self.bytes().into())
465    }
466
467    #[inline]
468    fn to_string(&self) -> Result<Cow<'_, str>> {
469        match str::from_utf8(self.bytes()) {
470            Ok(s) => Ok(s.into()),
471            _ => Err(Error::BadUtf8),
472        }
473    }
474
475    #[inline]
476    fn to_string_lossy(&self) -> Result<Cow<'_, str>> {
477        Ok(String::from_utf8_lossy(self.bytes()))
478    }
479
480    #[inline]
481    fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> {
482        match self.range.read_slice(buf.len()) {
483            Some(slice) => {
484                buf.copy_from_slice(slice);
485                Ok(())
486            }
487            None => Err(Error::UnexpectedEof(self.offset_id())),
488        }
489    }
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495    use crate::endianity::NativeEndian;
496    use crate::read::Reader;
497
498    fn native_reader<T: CloneStableDeref<Target = [u8]> + Debug>(
499        bytes: T,
500    ) -> EndianReader<NativeEndian, T> {
501        EndianReader::new(bytes, NativeEndian)
502    }
503
504    const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
505
506    #[test]
507    fn test_reader_split() {
508        let mut reader = native_reader(BUF);
509        let left = reader.split(3).unwrap();
510        assert_eq!(left, native_reader(&BUF[..3]));
511        assert_eq!(reader, native_reader(&BUF[3..]));
512    }
513
514    #[test]
515    fn test_reader_split_out_of_bounds() {
516        let mut reader = native_reader(BUF);
517        assert!(reader.split(30).is_err());
518    }
519
520    #[test]
521    fn bytes_and_len_and_range_and_eq() {
522        let reader = native_reader(BUF);
523        assert_eq!(reader.len(), BUF.len());
524        assert_eq!(reader.bytes(), BUF);
525        assert_eq!(reader, native_reader(BUF));
526
527        let range = reader.range(2..8);
528        let buf_range = &BUF[2..8];
529        assert_eq!(range.len(), buf_range.len());
530        assert_eq!(range.bytes(), buf_range);
531        assert_ne!(range, native_reader(BUF));
532        assert_eq!(range, native_reader(buf_range));
533
534        let range_from = range.range_from(1..);
535        let buf_range_from = &buf_range[1..];
536        assert_eq!(range_from.len(), buf_range_from.len());
537        assert_eq!(range_from.bytes(), buf_range_from);
538        assert_ne!(range_from, native_reader(BUF));
539        assert_eq!(range_from, native_reader(buf_range_from));
540
541        let range_to = range_from.range_to(..4);
542        let buf_range_to = &buf_range_from[..4];
543        assert_eq!(range_to.len(), buf_range_to.len());
544        assert_eq!(range_to.bytes(), buf_range_to);
545        assert_ne!(range_to, native_reader(BUF));
546        assert_eq!(range_to, native_reader(buf_range_to));
547    }
548
549    #[test]
550    fn find() {
551        let mut reader = native_reader(BUF);
552        reader.skip(2).unwrap();
553        assert_eq!(
554            reader.find(5),
555            Ok(BUF[2..].iter().position(|x| *x == 5).unwrap())
556        );
557    }
558
559    #[test]
560    fn indexing() {
561        let mut reader = native_reader(BUF);
562        reader.skip(2).unwrap();
563        assert_eq!(reader[0], BUF[2]);
564    }
565
566    #[test]
567    #[should_panic]
568    fn indexing_out_of_bounds() {
569        let mut reader = native_reader(BUF);
570        reader.skip(2).unwrap();
571        let _ = reader[900];
572    }
573
574    #[test]
575    fn endian() {
576        let reader = native_reader(BUF);
577        assert_eq!(reader.endian(), NativeEndian);
578    }
579
580    #[test]
581    fn empty() {
582        let mut reader = native_reader(BUF);
583        assert!(!reader.is_empty());
584        reader.empty();
585        assert!(reader.is_empty());
586        assert!(reader.bytes().is_empty());
587    }
588
589    #[test]
590    fn truncate() {
591        let reader = native_reader(BUF);
592        let mut reader = reader.range(2..8);
593        reader.truncate(2).unwrap();
594        assert_eq!(reader.bytes(), &BUF[2..4]);
595    }
596
597    #[test]
598    fn offset_from() {
599        let reader = native_reader(BUF);
600        let sub = reader.range(2..8);
601        assert_eq!(sub.offset_from(&reader), 2);
602    }
603
604    #[test]
605    fn skip() {
606        let mut reader = native_reader(BUF);
607        reader.skip(2).unwrap();
608        assert_eq!(reader.bytes(), &BUF[2..]);
609    }
610
611    #[test]
612    fn to_slice() {
613        assert_eq!(
614            native_reader(BUF).range(2..5).to_slice(),
615            Ok(Cow::from(&BUF[2..5]))
616        );
617    }
618
619    #[test]
620    fn to_string_ok() {
621        let buf = b"hello, world!";
622        let reader = native_reader(&buf[..]);
623        let reader = reader.range_from(7..);
624        assert_eq!(reader.to_string(), Ok(Cow::from("world!")));
625    }
626
627    // The rocket emoji (🚀 = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one
628    // to make it invalid UTF-8.
629    const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0];
630
631    #[test]
632    fn to_string_err() {
633        let reader = native_reader(BAD_UTF8);
634        assert!(reader.to_string().is_err());
635    }
636
637    #[test]
638    fn to_string_lossy() {
639        let reader = native_reader(BAD_UTF8);
640        assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����")));
641    }
642
643    #[test]
644    fn read_u8_array() {
645        let mut reader = native_reader(BAD_UTF8);
646        reader.skip(1).unwrap();
647        let arr: [u8; 2] = reader.read_u8_array().unwrap();
648        assert_eq!(arr, &BAD_UTF8[1..3]);
649        assert_eq!(reader.bytes(), &BAD_UTF8[3..]);
650    }
651}