protobuf/text_format/
mod.rs

1//! Protobuf "text format" implementation.
2//!
3//! Text format message look like this:
4//!
5//! ```text,ignore
6//! size: 17
7//! color: "red"
8//! children {
9//!     size: 18
10//!     color: "blue"
11//! }
12//! children {
13//!     size: 19
14//!     color: "green"
15//! }
16//! ```
17//!
18//! This format is not specified, but it is implemented by all official
19//! protobuf implementations, including `protoc` command which can decode
20//! and encode messages using text format.
21
22use std;
23use std::fmt;
24use std::fmt::Write;
25
26use crate::message::Message;
27use crate::reflect::ReflectFieldRef;
28use crate::reflect::ReflectValueRef;
29
30mod print;
31
32// Used by text format parser and by pure-rust codegen parsed
33// this it is public but hidden module.
34// https://github.com/rust-lang/rust/issues/44663
35#[doc(hidden)]
36pub mod lexer;
37
38use self::print::print_str_to;
39#[doc(hidden)]
40pub use self::print::quote_bytes_to;
41#[doc(hidden)]
42pub use self::print::quote_escape_bytes;
43use crate::text_format::print::quote_escape_bytes_to;
44
45#[doc(hidden)]
46pub fn unescape_string(string: &str) -> Vec<u8> {
47    fn parse_if_digit(chars: &mut std::str::Chars) -> u8 {
48        let mut copy = chars.clone();
49        let f = match copy.next() {
50            None => return 0,
51            Some(f) => f,
52        };
53        let d = match f {
54            '0'..='9' => f as u8 - b'0',
55            _ => return 0,
56        };
57        *chars = copy;
58        d
59    }
60
61    fn parse_hex_digit(chars: &mut std::str::Chars) -> u8 {
62        match chars.next().unwrap() {
63            c @ '0'..='9' => (c as u8) - b'0',
64            c @ 'a'..='f' => (c as u8) - b'a' + 10,
65            c @ 'A'..='F' => (c as u8) - b'A' + 10,
66            _ => panic!("incorrect hex escape"),
67        }
68    }
69
70    fn parse_escape_rem(chars: &mut std::str::Chars) -> u8 {
71        let n = chars.next().unwrap();
72        match n {
73            'a' => return b'\x07',
74            'b' => return b'\x08',
75            'f' => return b'\x0c',
76            'n' => return b'\n',
77            'r' => return b'\r',
78            't' => return b'\t',
79            'v' => return b'\x0b',
80            '"' => return b'"',
81            '\'' => return b'\'',
82            '0'..='9' => {
83                let d1 = n as u8 - b'0';
84                let d2 = parse_if_digit(chars);
85                let d3 = parse_if_digit(chars);
86                return (d1 * 64 + d2 * 8 + d3) as u8;
87            }
88            'x' => {
89                let d1 = parse_hex_digit(chars);
90                let d2 = parse_hex_digit(chars);
91                return d1 * 16 + d2;
92            }
93            c => return c as u8, // TODO: validate ASCII
94        };
95    }
96
97    let mut chars = string.chars();
98    let mut r = Vec::new();
99
100    loop {
101        let f = match chars.next() {
102            None => return r,
103            Some(f) => f,
104        };
105
106        if f == '\\' {
107            r.push(parse_escape_rem(&mut chars));
108        } else {
109            r.push(f as u8); // TODO: escape UTF-8
110        }
111    }
112}
113
114fn do_indent(buf: &mut String, pretty: bool, indent: usize) {
115    if pretty && indent > 0 {
116        for _ in 0..indent {
117            buf.push_str("  ");
118        }
119    }
120}
121
122fn print_start_field(
123    buf: &mut String,
124    pretty: bool,
125    indent: usize,
126    first: &mut bool,
127    field_name: &str,
128) {
129    if !*first && !pretty {
130        buf.push_str(" ");
131    }
132    do_indent(buf, pretty, indent);
133    *first = false;
134    buf.push_str(field_name);
135}
136
137fn print_end_field(buf: &mut String, pretty: bool) {
138    if pretty {
139        buf.push_str("\n");
140    }
141}
142
143fn print_field(
144    buf: &mut String,
145    pretty: bool,
146    indent: usize,
147    first: &mut bool,
148    field_name: &str,
149    value: ReflectValueRef,
150) {
151    print_start_field(buf, pretty, indent, first, field_name);
152
153    match value {
154        ReflectValueRef::Message(m) => {
155            buf.push_str(" {");
156            if pretty {
157                buf.push_str("\n");
158            }
159            print_to_internal(m, buf, pretty, indent + 1);
160            do_indent(buf, pretty, indent);
161            buf.push_str("}");
162        }
163        ReflectValueRef::Enum(e) => {
164            buf.push_str(": ");
165            buf.push_str(e.name());
166        }
167        ReflectValueRef::String(s) => {
168            buf.push_str(": ");
169            print_str_to(s, buf);
170        }
171        ReflectValueRef::Bytes(b) => {
172            buf.push_str(": ");
173            quote_escape_bytes_to(b, buf);
174        }
175        ReflectValueRef::I32(v) => {
176            write!(buf, ": {}", v).unwrap();
177        }
178        ReflectValueRef::I64(v) => {
179            write!(buf, ": {}", v).unwrap();
180        }
181        ReflectValueRef::U32(v) => {
182            write!(buf, ": {}", v).unwrap();
183        }
184        ReflectValueRef::U64(v) => {
185            write!(buf, ": {}", v).unwrap();
186        }
187        ReflectValueRef::Bool(v) => {
188            write!(buf, ": {}", v).unwrap();
189        }
190        ReflectValueRef::F32(v) => {
191            write!(buf, ": {}", v).unwrap();
192        }
193        ReflectValueRef::F64(v) => {
194            write!(buf, ": {}", v).unwrap();
195        }
196    }
197
198    print_end_field(buf, pretty);
199}
200
201fn print_to_internal(m: &dyn Message, buf: &mut String, pretty: bool, indent: usize) {
202    let d = m.descriptor();
203    let mut first = true;
204    for f in d.fields() {
205        match f.get_reflect(m) {
206            ReflectFieldRef::Map(map) => {
207                for (k, v) in map {
208                    print_start_field(buf, pretty, indent, &mut first, f.name());
209                    buf.push_str(" {");
210                    if pretty {
211                        buf.push_str("\n");
212                    }
213
214                    let mut entry_first = true;
215
216                    print_field(buf, pretty, indent + 1, &mut entry_first, "key", k.as_ref());
217                    print_field(
218                        buf,
219                        pretty,
220                        indent + 1,
221                        &mut entry_first,
222                        "value",
223                        v.as_ref(),
224                    );
225                    do_indent(buf, pretty, indent);
226                    buf.push_str("}");
227                    print_end_field(buf, pretty);
228                }
229            }
230            ReflectFieldRef::Repeated(repeated) => {
231                // TODO: do not print zeros for v3
232                for v in repeated {
233                    print_field(buf, pretty, indent, &mut first, f.name(), v.as_ref());
234                }
235            }
236            ReflectFieldRef::Optional(optional) => {
237                if let Some(v) = optional {
238                    print_field(buf, pretty, indent, &mut first, f.name(), v);
239                }
240            }
241        }
242    }
243
244    // TODO: unknown fields
245}
246
247/// Text-format
248pub fn print_to(m: &dyn Message, buf: &mut String) {
249    print_to_internal(m, buf, false, 0)
250}
251
252fn print_to_string_internal(m: &dyn Message, pretty: bool) -> String {
253    let mut r = String::new();
254    print_to_internal(m, &mut r, pretty, 0);
255    r.to_string()
256}
257
258/// Text-format
259pub fn print_to_string(m: &dyn Message) -> String {
260    print_to_string_internal(m, false)
261}
262
263/// Text-format to `fmt::Formatter`.
264pub fn fmt(m: &dyn Message, f: &mut fmt::Formatter) -> fmt::Result {
265    let pretty = f.alternate();
266    f.write_str(&print_to_string_internal(m, pretty))
267}
268
269#[cfg(test)]
270mod test {
271
272    fn escape(data: &[u8]) -> String {
273        let mut s = String::with_capacity(data.len() * 4);
274        super::quote_bytes_to(data, &mut s);
275        s
276    }
277
278    fn test_escape_unescape(text: &str, escaped: &str) {
279        assert_eq!(text.as_bytes(), &super::unescape_string(escaped)[..]);
280        assert_eq!(escaped, &escape(text.as_bytes())[..]);
281    }
282
283    #[test]
284    fn test_print_to_bytes() {
285        assert_eq!("ab", escape(b"ab"));
286        assert_eq!("a\\\\023", escape(b"a\\023"));
287        assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\"));
288        assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes()));
289    }
290
291    #[test]
292    fn test_unescape_string() {
293        test_escape_unescape("", "");
294        test_escape_unescape("aa", "aa");
295        test_escape_unescape("\n", "\\n");
296        test_escape_unescape("\r", "\\r");
297        test_escape_unescape("\t", "\\t");
298        test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275");
299        // hex
300        assert_eq!(b"aaa\x01bbb", &super::unescape_string("aaa\\x01bbb")[..]);
301        assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]);
302        assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]);
303        // quotes
304        assert_eq!(b"aaa\"bbb", &super::unescape_string("aaa\\\"bbb")[..]);
305        assert_eq!(b"aaa\'bbb", &super::unescape_string("aaa\\\'bbb")[..]);
306    }
307}