object/write/
string.rs

1use alloc::vec::Vec;
2
3#[cfg(feature = "write_std")]
4type IndexSet<K> = indexmap::IndexSet<K>;
5#[cfg(not(feature = "write_std"))]
6type IndexSet<K> = indexmap::IndexSet<K, hashbrown::DefaultHashBuilder>;
7
8/// An identifier for an entry in a string table.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct StringId(usize);
11
12#[derive(Debug, Default)]
13pub(crate) struct StringTable<'a> {
14    strings: IndexSet<&'a [u8]>,
15    offsets: Vec<usize>,
16}
17
18impl<'a> StringTable<'a> {
19    /// Add a string to the string table.
20    ///
21    /// Panics if the string table has already been written, or
22    /// if the string contains a null byte.
23    pub fn add(&mut self, string: &'a [u8]) -> StringId {
24        assert!(self.offsets.is_empty());
25        assert!(!string.contains(&0));
26        let id = self.strings.insert_full(string).0;
27        StringId(id)
28    }
29
30    /// Return the id of the given string.
31    ///
32    /// Panics if the string is not in the string table.
33    #[allow(dead_code)]
34    pub fn get_id(&self, string: &[u8]) -> StringId {
35        let id = self.strings.get_index_of(string).unwrap();
36        StringId(id)
37    }
38
39    /// Return the string for the given id.
40    ///
41    /// Panics if the string is not in the string table.
42    #[allow(dead_code)]
43    pub fn get_string(&self, id: StringId) -> &'a [u8] {
44        self.strings.get_index(id.0).unwrap()
45    }
46
47    /// Return the offset of the given string.
48    ///
49    /// Panics if the string table has not been written, or
50    /// if the string is not in the string table.
51    pub fn get_offset(&self, id: StringId) -> usize {
52        self.offsets[id.0]
53    }
54
55    /// Append the string table to the given `Vec`, and
56    /// calculate the list of string offsets.
57    ///
58    /// `base` is the initial string table offset. For example,
59    /// this should be 1 for ELF, to account for the initial
60    /// null byte (which must have been written by the caller).
61    ///
62    /// Panics if the string table has already been written.
63    pub fn write(&mut self, base: usize, w: &mut Vec<u8>) {
64        assert!(self.offsets.is_empty());
65
66        let mut ids: Vec<_> = (0..self.strings.len()).collect();
67        sort(&mut ids, 1, &self.strings);
68
69        self.offsets = vec![0; ids.len()];
70        let mut offset = base;
71        let mut previous = &[][..];
72        for id in ids {
73            let string = self.strings.get_index(id).unwrap();
74            if previous.ends_with(string) {
75                self.offsets[id] = offset - string.len() - 1;
76            } else {
77                self.offsets[id] = offset;
78                w.extend_from_slice(string);
79                w.push(0);
80                offset += string.len() + 1;
81                previous = string;
82            }
83        }
84    }
85
86    /// Calculate the size in bytes of the string table.
87    ///
88    /// `base` is the initial string table offset. For example,
89    /// this should be 1 for ELF, to account for the initial
90    /// null byte.
91    #[allow(dead_code)]
92    pub fn size(&self, base: usize) -> usize {
93        // TODO: cache this result?
94        let mut ids: Vec<_> = (0..self.strings.len()).collect();
95        sort(&mut ids, 1, &self.strings);
96
97        let mut size = base;
98        let mut previous = &[][..];
99        for id in ids {
100            let string = self.strings.get_index(id).unwrap();
101            if !previous.ends_with(string) {
102                size += string.len() + 1;
103                previous = string;
104            }
105        }
106        size
107    }
108}
109
110// Multi-key quicksort.
111//
112// Ordering is such that if a string is a suffix of at least one other string,
113// then it is placed immediately after one of those strings. That is:
114// - comparison starts at the end of the string
115// - shorter strings come later
116//
117// Based on the implementation in LLVM.
118fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) {
119    loop {
120        if ids.len() <= 1 {
121            return;
122        }
123
124        let pivot = byte(ids[0], pos, strings);
125        let mut lower = 0;
126        let mut upper = ids.len();
127        let mut i = 1;
128        while i < upper {
129            let b = byte(ids[i], pos, strings);
130            if b > pivot {
131                ids.swap(lower, i);
132                lower += 1;
133                i += 1;
134            } else if b < pivot {
135                upper -= 1;
136                ids.swap(upper, i);
137            } else {
138                i += 1;
139            }
140        }
141
142        sort(&mut ids[..lower], pos, strings);
143        sort(&mut ids[upper..], pos, strings);
144
145        if pivot == 0 {
146            return;
147        }
148        ids = &mut ids[lower..upper];
149        pos += 1;
150    }
151}
152
153fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 {
154    let string = strings.get_index(id).unwrap();
155    let len = string.len();
156    if len >= pos {
157        string[len - pos]
158    } else {
159        // We know the strings don't contain null bytes.
160        0
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn string_table() {
170        let mut table = StringTable::default();
171        let id0 = table.add(b"");
172        let id1 = table.add(b"foo");
173        let id2 = table.add(b"bar");
174        let id3 = table.add(b"foobar");
175
176        let mut data = Vec::new();
177        data.push(0);
178        table.write(1, &mut data);
179        assert_eq!(data, b"\0foobar\0foo\0");
180
181        assert_eq!(table.get_offset(id0), 11);
182        assert_eq!(table.get_offset(id1), 8);
183        assert_eq!(table.get_offset(id2), 4);
184        assert_eq!(table.get_offset(id3), 1);
185    }
186}