1#[cfg(feature = "alloc")]
28use alloc::borrow::Cow;
29#[cfg(feature = "alloc")]
30use alloc::string::String;
31#[cfg(feature = "alloc")]
32use alloc::vec::Vec;
33
34use super::in_inclusive_range16;
35use super::in_inclusive_range32;
36use super::in_inclusive_range8;
37use super::in_range16;
38use super::in_range32;
39use super::DecoderResult;
40use crate::ascii::*;
41use crate::utf_8::*;
42
43macro_rules! non_fuzz_debug_assert {
44 ($($arg:tt)*) => (if !cfg!(fuzzing) { debug_assert!($($arg)*); })
45}
46
47cfg_if! {
48 if #[cfg(feature = "simd-accel")] {
49 use ::core::intrinsics::likely;
50 use ::core::intrinsics::unlikely;
51 } else {
52 #[inline(always)]
53 fn likely(b: bool) -> bool {
54 b
55 }
56 #[inline(always)]
57 fn unlikely(b: bool) -> bool {
58 b
59 }
60 }
61}
62
63#[must_use]
67#[derive(Debug, PartialEq, Eq)]
68#[repr(C)]
69pub enum Latin1Bidi {
70 Latin1 = 0,
72 LeftToRight = 1,
75 Bidi = 2,
77}
78
79#[allow(dead_code)]
81const LATIN1_MASK: usize = 0xFF00_FF00_FF00_FF00u64 as usize;
82
83#[allow(unused_macros)]
84macro_rules! by_unit_check_alu {
85 ($name:ident, $unit:ty, $bound:expr, $mask:ident) => {
86 #[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
87 #[inline(always)]
88 fn $name(buffer: &[$unit]) -> bool {
89 let mut offset = 0usize;
90 let mut accu = 0usize;
91 let unit_size = ::core::mem::size_of::<$unit>();
92 let len = buffer.len();
93 if len >= ALU_ALIGNMENT / unit_size {
94 if buffer[0] >= $bound {
97 return false;
98 }
99 let src = buffer.as_ptr();
100 let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK))
101 & ALU_ALIGNMENT_MASK)
102 / unit_size;
103 if until_alignment + ALU_ALIGNMENT / unit_size <= len {
104 if until_alignment != 0 {
105 accu |= buffer[offset] as usize;
106 offset += 1;
107 until_alignment -= 1;
108 while until_alignment != 0 {
109 accu |= buffer[offset] as usize;
110 offset += 1;
111 until_alignment -= 1;
112 }
113 if accu >= $bound {
114 return false;
115 }
116 }
117 let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
118 if offset + (4 * (ALU_ALIGNMENT / unit_size)) <= len {
119 let len_minus_unroll = len - (4 * (ALU_ALIGNMENT / unit_size));
125 loop {
126 let unroll_accu = unsafe { *(src.add(offset) as *const usize) }
127 | unsafe {
128 *(src.add(offset + (ALU_ALIGNMENT / unit_size)) as *const usize)
129 }
130 | unsafe {
131 *(src.add(offset + (2 * (ALU_ALIGNMENT / unit_size)))
132 as *const usize)
133 }
134 | unsafe {
135 *(src.add(offset + (3 * (ALU_ALIGNMENT / unit_size)))
136 as *const usize)
137 };
138 if unroll_accu & $mask != 0 {
139 return false;
140 }
141 offset += 4 * (ALU_ALIGNMENT / unit_size);
142 if offset > len_minus_unroll {
144 break;
145 }
146 }
147 }
148 while offset <= len_minus_stride {
149 accu |= unsafe { *(src.add(offset) as *const usize) };
151 offset += ALU_ALIGNMENT / unit_size;
152 }
153 }
154 }
155 for &unit in &buffer[offset..] {
156 accu |= unit as usize;
157 }
158 accu & $mask == 0
159 }
160 };
161}
162
163#[allow(unused_macros)]
164macro_rules! by_unit_check_simd {
165 ($name:ident, $unit:ty, $splat:expr, $simd_ty:ty, $bound:expr, $func:ident) => {
166 #[inline(always)]
167 fn $name(buffer: &[$unit]) -> bool {
168 let mut offset = 0usize;
169 let mut accu = 0usize;
170 let unit_size = ::core::mem::size_of::<$unit>();
171 let len = buffer.len();
172 if len >= SIMD_STRIDE_SIZE / unit_size {
173 if buffer[0] >= $bound {
176 return false;
177 }
178 let src = buffer.as_ptr();
179 let mut until_alignment = ((SIMD_ALIGNMENT
180 - ((src as usize) & SIMD_ALIGNMENT_MASK))
181 & SIMD_ALIGNMENT_MASK)
182 / unit_size;
183 if until_alignment + SIMD_STRIDE_SIZE / unit_size <= len {
184 if until_alignment != 0 {
185 accu |= buffer[offset] as usize;
186 offset += 1;
187 until_alignment -= 1;
188 while until_alignment != 0 {
189 accu |= buffer[offset] as usize;
190 offset += 1;
191 until_alignment -= 1;
192 }
193 if accu >= $bound {
194 return false;
195 }
196 }
197 let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
198 if offset + (4 * (SIMD_STRIDE_SIZE / unit_size)) <= len {
199 let len_minus_unroll = len - (4 * (SIMD_STRIDE_SIZE / unit_size));
205 loop {
206 let unroll_accu = unsafe { *(src.add(offset) as *const $simd_ty) }
207 | unsafe {
208 *(src.add(offset + (SIMD_STRIDE_SIZE / unit_size))
209 as *const $simd_ty)
210 }
211 | unsafe {
212 *(src.add(offset + (2 * (SIMD_STRIDE_SIZE / unit_size)))
213 as *const $simd_ty)
214 }
215 | unsafe {
216 *(src.add(offset + (3 * (SIMD_STRIDE_SIZE / unit_size)))
217 as *const $simd_ty)
218 };
219 if !$func(unroll_accu) {
220 return false;
221 }
222 offset += 4 * (SIMD_STRIDE_SIZE / unit_size);
223 if offset > len_minus_unroll {
225 break;
226 }
227 }
228 }
229 let mut simd_accu = $splat;
230 while offset <= len_minus_stride {
231 simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) };
233 offset += SIMD_STRIDE_SIZE / unit_size;
234 }
235 if !$func(simd_accu) {
236 return false;
237 }
238 }
239 }
240 for &unit in &buffer[offset..] {
241 accu |= unit as usize;
242 }
243 accu < $bound
244 }
245 };
246}
247
248cfg_if! {
249 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
250 use crate::simd_funcs::*;
251 use core::simd::u8x16;
252 use core::simd::u16x8;
253
254 const SIMD_ALIGNMENT: usize = 16;
255
256 const SIMD_ALIGNMENT_MASK: usize = 15;
257
258 by_unit_check_simd!(is_ascii_impl, u8, u8x16::splat(0), u8x16, 0x80, simd_is_ascii);
259 by_unit_check_simd!(is_basic_latin_impl, u16, u16x8::splat(0), u16x8, 0x80, simd_is_basic_latin);
260 by_unit_check_simd!(is_utf16_latin1_impl, u16, u16x8::splat(0), u16x8, 0x100, simd_is_latin1);
261
262 #[inline(always)]
263 fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
264 let unit_size = ::core::mem::size_of::<u16>();
269 let src = buffer.as_ptr();
270 let len = buffer.len();
271 let mut offset = 0usize;
272 'outer: loop {
273 let until_alignment = ((SIMD_ALIGNMENT - ((unsafe { src.add(offset) } as usize) & SIMD_ALIGNMENT_MASK)) &
274 SIMD_ALIGNMENT_MASK) / unit_size;
275 if until_alignment == 0 {
276 if offset + SIMD_STRIDE_SIZE / unit_size > len {
277 break;
278 }
279 } else {
280 let offset_plus_until_alignment = offset + until_alignment;
281 let offset_plus_until_alignment_plus_one = offset_plus_until_alignment + 1;
282 if offset_plus_until_alignment_plus_one + SIMD_STRIDE_SIZE / unit_size > len {
283 break;
284 }
285 let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_until_alignment_plus_one]);
286 if up_to < until_alignment {
287 return offset + up_to;
288 }
289 if last_valid_low {
290 offset = offset_plus_until_alignment_plus_one;
291 continue;
292 }
293 offset = offset_plus_until_alignment;
294 }
295 let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
296 loop {
297 let offset_plus_stride = offset + SIMD_STRIDE_SIZE / unit_size;
298 if contains_surrogates(unsafe { *(src.add(offset) as *const u16x8) }) {
299 if offset_plus_stride == len {
300 break 'outer;
301 }
302 let offset_plus_stride_plus_one = offset_plus_stride + 1;
303 let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_stride_plus_one]);
304 if up_to < SIMD_STRIDE_SIZE / unit_size {
305 return offset + up_to;
306 }
307 if last_valid_low {
308 offset = offset_plus_stride_plus_one;
309 continue 'outer;
310 }
311 }
312 offset = offset_plus_stride;
313 if offset > len_minus_stride {
314 break 'outer;
315 }
316 }
317 }
318 let (up_to, _) = utf16_valid_up_to_alu(&buffer[offset..]);
319 offset + up_to
320 }
321 } else {
322 by_unit_check_alu!(is_ascii_impl, u8, 0x80, ASCII_MASK);
323 by_unit_check_alu!(is_basic_latin_impl, u16, 0x80, BASIC_LATIN_MASK);
324 by_unit_check_alu!(is_utf16_latin1_impl, u16, 0x100, LATIN1_MASK);
325
326 #[inline(always)]
327 fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
328 let (up_to, _) = utf16_valid_up_to_alu(buffer);
329 up_to
330 }
331 }
332}
333
334#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
337#[inline(always)]
338fn utf16_valid_up_to_alu(buffer: &[u16]) -> (usize, bool) {
339 let len = buffer.len();
340 if len == 0 {
341 return (0, false);
342 }
343 let mut offset = 0usize;
344 loop {
345 let unit = buffer[offset];
346 let next = offset + 1;
347 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
348 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
349 offset = next;
351 if offset == len {
352 return (offset, false);
353 }
354 continue;
355 }
356 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
357 if next < len {
359 let second = buffer[next];
360 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
361 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
362 offset = next + 1;
364 if offset == len {
365 return (offset, true);
366 }
367 continue;
368 }
369 }
373 }
375 return (offset, false);
377 }
378}
379
380cfg_if! {
381 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
382 #[inline(always)]
383 fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
384 let mut offset = 0usize;
385 let bytes = buffer.as_bytes();
386 let len = bytes.len();
387 if len >= SIMD_STRIDE_SIZE {
388 let src = bytes.as_ptr();
389 let mut until_alignment = (SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
390 SIMD_ALIGNMENT_MASK;
391 if until_alignment + SIMD_STRIDE_SIZE <= len {
392 while until_alignment != 0 {
393 if bytes[offset] > 0xC3 {
394 return Some(offset);
395 }
396 offset += 1;
397 until_alignment -= 1;
398 }
399 let len_minus_stride = len - SIMD_STRIDE_SIZE;
400 loop {
401 if !simd_is_str_latin1(unsafe { *(src.add(offset) as *const u8x16) }) {
402 while bytes[offset] & 0xC0 == 0x80 {
404 offset += 1;
405 }
406 return Some(offset);
407 }
408 offset += SIMD_STRIDE_SIZE;
409 if offset > len_minus_stride {
410 break;
411 }
412 }
413 }
414 }
415 for i in offset..len {
416 if bytes[i] > 0xC3 {
417 return Some(i);
418 }
419 }
420 None
421 }
422 } else {
423 #[inline(always)]
424 fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
425 let mut bytes = buffer.as_bytes();
426 let mut total = 0;
427 loop {
428 if let Some((byte, offset)) = validate_ascii(bytes) {
429 total += offset;
430 if byte > 0xC3 {
431 return Some(total);
432 }
433 bytes = &bytes[offset + 2..];
434 total += 2;
435 } else {
436 return None;
437 }
438 }
439 }
440 }
441}
442
443#[inline(always)]
444fn is_utf8_latin1_impl(buffer: &[u8]) -> Option<usize> {
445 let mut bytes = buffer;
446 let mut total = 0;
447 loop {
448 if let Some((byte, offset)) = validate_ascii(bytes) {
449 total += offset;
450 if in_inclusive_range8(byte, 0xC2, 0xC3) {
451 let next = offset + 1;
452 if next == bytes.len() {
453 return Some(total);
454 }
455 if bytes[next] & 0xC0 != 0x80 {
456 return Some(total);
457 }
458 bytes = &bytes[offset + 2..];
459 total += 2;
460 } else {
461 return Some(total);
462 }
463 } else {
464 return None;
465 }
466 }
467}
468
469cfg_if! {
470 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
471 #[inline(always)]
472 fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
473 let mut offset = 0usize;
474 let len = buffer.len();
475 if len >= SIMD_STRIDE_SIZE / 2 {
476 let src = buffer.as_ptr();
477 let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
478 SIMD_ALIGNMENT_MASK) / 2;
479 if until_alignment + (SIMD_STRIDE_SIZE / 2) <= len {
480 while until_alignment != 0 {
481 if is_utf16_code_unit_bidi(buffer[offset]) {
482 return true;
483 }
484 offset += 1;
485 until_alignment -= 1;
486 }
487 let len_minus_stride = len - (SIMD_STRIDE_SIZE / 2);
488 loop {
489 if is_u16x8_bidi(unsafe { *(src.add(offset) as *const u16x8) }) {
490 return true;
491 }
492 offset += SIMD_STRIDE_SIZE / 2;
493 if offset > len_minus_stride {
494 break;
495 }
496 }
497 }
498 }
499 for &u in &buffer[offset..] {
500 if is_utf16_code_unit_bidi(u) {
501 return true;
502 }
503 }
504 false
505 }
506 } else {
507 #[inline(always)]
508 fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
509 for &u in buffer {
510 if is_utf16_code_unit_bidi(u) {
511 return true;
512 }
513 }
514 false
515 }
516 }
517}
518
519cfg_if! {
520 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
521 #[inline(always)]
522 fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
523 let mut offset = 0usize;
524 let len = buffer.len();
525 if len >= SIMD_STRIDE_SIZE / 2 {
526 let src = buffer.as_ptr();
527 let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
528 SIMD_ALIGNMENT_MASK) / 2;
529 if until_alignment + (SIMD_STRIDE_SIZE / 2) <= len {
530 while until_alignment != 0 {
531 if buffer[offset] > 0xFF {
532 if is_utf16_bidi_impl(&buffer[offset..]) {
535 return Latin1Bidi::Bidi;
536 }
537 return Latin1Bidi::LeftToRight;
538 }
539 offset += 1;
540 until_alignment -= 1;
541 }
542 let len_minus_stride = len - (SIMD_STRIDE_SIZE / 2);
543 loop {
544 let mut s = unsafe { *(src.add(offset) as *const u16x8) };
545 if !simd_is_latin1(s) {
546 loop {
547 if is_u16x8_bidi(s) {
548 return Latin1Bidi::Bidi;
549 }
550 offset += SIMD_STRIDE_SIZE / 2;
551 if offset > len_minus_stride {
552 for &u in &buffer[offset..] {
553 if is_utf16_code_unit_bidi(u) {
554 return Latin1Bidi::Bidi;
555 }
556 }
557 return Latin1Bidi::LeftToRight;
558 }
559 s = unsafe { *(src.add(offset) as *const u16x8) };
560 }
561 }
562 offset += SIMD_STRIDE_SIZE / 2;
563 if offset > len_minus_stride {
564 break;
565 }
566 }
567 }
568 }
569 let mut iter = (&buffer[offset..]).iter();
570 loop {
571 if let Some(&u) = iter.next() {
572 if u > 0xFF {
573 let mut inner_u = u;
574 loop {
575 if is_utf16_code_unit_bidi(inner_u) {
576 return Latin1Bidi::Bidi;
577 }
578 if let Some(&code_unit) = iter.next() {
579 inner_u = code_unit;
580 } else {
581 return Latin1Bidi::LeftToRight;
582 }
583 }
584 }
585 } else {
586 return Latin1Bidi::Latin1;
587 }
588 }
589 }
590 } else {
591 #[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
592 #[inline(always)]
593 fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
594 let mut offset = 0usize;
595 let len = buffer.len();
596 if len >= ALU_ALIGNMENT / 2 {
597 let src = buffer.as_ptr();
598 let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) &
599 ALU_ALIGNMENT_MASK) / 2;
600 if until_alignment + ALU_ALIGNMENT / 2 <= len {
601 while until_alignment != 0 {
602 if buffer[offset] > 0xFF {
603 if is_utf16_bidi_impl(&buffer[offset..]) {
604 return Latin1Bidi::Bidi;
605 }
606 return Latin1Bidi::LeftToRight;
607 }
608 offset += 1;
609 until_alignment -= 1;
610 }
611 let len_minus_stride = len - ALU_ALIGNMENT / 2;
612 loop {
613 if unsafe { *(src.add(offset) as *const usize) } & LATIN1_MASK != 0 {
614 if is_utf16_bidi_impl(&buffer[offset..]) {
615 return Latin1Bidi::Bidi;
616 }
617 return Latin1Bidi::LeftToRight;
618 }
619 offset += ALU_ALIGNMENT / 2;
620 if offset > len_minus_stride {
621 break;
622 }
623 }
624 }
625 }
626 let mut iter = (&buffer[offset..]).iter();
627 loop {
628 if let Some(&u) = iter.next() {
629 if u > 0xFF {
630 let mut inner_u = u;
631 loop {
632 if is_utf16_code_unit_bidi(inner_u) {
633 return Latin1Bidi::Bidi;
634 }
635 if let Some(&code_unit) = iter.next() {
636 inner_u = code_unit;
637 } else {
638 return Latin1Bidi::LeftToRight;
639 }
640 }
641 }
642 } else {
643 return Latin1Bidi::Latin1;
644 }
645 }
646 }
647 }
648}
649
650pub fn is_ascii(buffer: &[u8]) -> bool {
655 is_ascii_impl(buffer)
656}
657
658pub fn is_basic_latin(buffer: &[u16]) -> bool {
664 is_basic_latin_impl(buffer)
665}
666
667pub fn is_utf8_latin1(buffer: &[u8]) -> bool {
673 is_utf8_latin1_impl(buffer).is_none()
674}
675
676pub fn is_str_latin1(buffer: &str) -> bool {
682 is_str_latin1_impl(buffer).is_none()
683}
684
685pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
691 is_utf16_latin1_impl(buffer)
692}
693
694#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if, cyclomatic_complexity))]
711#[inline]
712pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
713 let mut src = buffer;
752 'outer: loop {
753 if let Some((mut byte, mut read)) = validate_ascii(src) {
754 if read + 4 <= src.len() {
757 'inner: loop {
758 match byte {
760 0..=0x7F => {
761 read += 1;
763 src = &src[read..];
764 continue 'outer;
765 }
766 0xC2..=0xD5 => {
767 let second = unsafe { *(src.get_unchecked(read + 1)) };
769 if !in_inclusive_range8(second, 0x80, 0xBF) {
770 return true;
771 }
772 read += 2;
773 }
774 0xD6 => {
775 let second = unsafe { *(src.get_unchecked(read + 1)) };
777 if !in_inclusive_range8(second, 0x80, 0xBF) {
778 return true;
779 }
780 if second > 0x8F {
782 return true;
783 }
784 read += 2;
785 }
786 0xE1 | 0xE3..=0xEC | 0xEE => {
788 let second = unsafe { *(src.get_unchecked(read + 1)) };
790 let third = unsafe { *(src.get_unchecked(read + 2)) };
791 if ((UTF8_DATA.table[usize::from(second)]
792 & unsafe {
793 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
794 })
795 | (third >> 6))
796 != 2
797 {
798 return true;
799 }
800 read += 3;
801 }
802 0xE2 => {
803 let second = unsafe { *(src.get_unchecked(read + 1)) };
805 let third = unsafe { *(src.get_unchecked(read + 2)) };
806 if ((UTF8_DATA.table[usize::from(second)]
807 & unsafe {
808 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
809 })
810 | (third >> 6))
811 != 2
812 {
813 return true;
814 }
815 if second == 0x80 {
816 if third == 0x8F || third == 0xAB || third == 0xAE {
817 return true;
818 }
819 } else if second == 0x81 {
820 if third == 0xA7 {
821 return true;
822 }
823 }
824 read += 3;
825 }
826 0xEF => {
827 let second = unsafe { *(src.get_unchecked(read + 1)) };
829 let third = unsafe { *(src.get_unchecked(read + 2)) };
830 if ((UTF8_DATA.table[usize::from(second)]
831 & unsafe {
832 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
833 })
834 | (third >> 6))
835 != 2
836 {
837 return true;
838 }
839 if in_inclusive_range8(second, 0xAC, 0xB7) {
840 if second == 0xAC {
841 if third > 0x9C {
842 return true;
843 }
844 } else {
845 return true;
846 }
847 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
848 if second == 0xB9 {
849 if third > 0xAF {
850 return true;
851 }
852 } else if second == 0xBB {
853 if third != 0xBF {
854 return true;
855 }
856 } else {
857 return true;
858 }
859 }
860 read += 3;
861 }
862 0xE0 => {
863 let second = unsafe { *(src.get_unchecked(read + 1)) };
865 let third = unsafe { *(src.get_unchecked(read + 2)) };
866 if ((UTF8_DATA.table[usize::from(second)]
867 & unsafe {
868 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
869 })
870 | (third >> 6))
871 != 2
872 {
873 return true;
874 }
875 if second < 0xA4 {
877 return true;
878 }
879 read += 3;
880 }
881 0xED => {
882 let second = unsafe { *(src.get_unchecked(read + 1)) };
884 let third = unsafe { *(src.get_unchecked(read + 2)) };
885 if ((UTF8_DATA.table[usize::from(second)]
886 & unsafe {
887 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
888 })
889 | (third >> 6))
890 != 2
891 {
892 return true;
893 }
894 read += 3;
895 }
896 0xF1..=0xF4 => {
897 let second = unsafe { *(src.get_unchecked(read + 1)) };
899 let third = unsafe { *(src.get_unchecked(read + 2)) };
900 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
901 if (u16::from(
902 UTF8_DATA.table[usize::from(second)]
903 & unsafe {
904 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
905 },
906 ) | u16::from(third >> 6)
907 | (u16::from(fourth & 0xC0) << 2))
908 != 0x202
909 {
910 return true;
911 }
912 read += 4;
913 }
914 0xF0 => {
915 let second = unsafe { *(src.get_unchecked(read + 1)) };
917 let third = unsafe { *(src.get_unchecked(read + 2)) };
918 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
919 if (u16::from(
920 UTF8_DATA.table[usize::from(second)]
921 & unsafe {
922 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
923 },
924 ) | u16::from(third >> 6)
925 | (u16::from(fourth & 0xC0) << 2))
926 != 0x202
927 {
928 return true;
929 }
930 if unlikely(second == 0x90 || second == 0x9E) {
931 let third = src[read + 2];
932 if third >= 0xA0 {
933 return true;
934 }
935 }
936 read += 4;
937 }
938 _ => {
939 return true;
941 }
942 }
943 if read + 4 > src.len() {
944 if read == src.len() {
945 return false;
946 }
947 byte = src[read];
948 break 'inner;
949 }
950 byte = src[read];
951 continue 'inner;
952 }
953 }
954 match byte {
959 0..=0x7F => {
960 read += 1;
962 src = &src[read..];
963 continue 'outer;
964 }
965 0xC2..=0xD5 => {
966 let new_read = read + 2;
968 if new_read > src.len() {
969 return true;
970 }
971 let second = unsafe { *(src.get_unchecked(read + 1)) };
972 if !in_inclusive_range8(second, 0x80, 0xBF) {
973 return true;
974 }
975 read = new_read;
976 src = &src[read..];
979 continue 'outer;
980 }
981 0xD6 => {
982 let new_read = read + 2;
984 if new_read > src.len() {
985 return true;
986 }
987 let second = unsafe { *(src.get_unchecked(read + 1)) };
988 if !in_inclusive_range8(second, 0x80, 0xBF) {
989 return true;
990 }
991 if second > 0x8F {
993 return true;
994 }
995 read = new_read;
996 src = &src[read..];
999 continue 'outer;
1000 }
1001 0xE1 | 0xE3..=0xEC | 0xEE => {
1003 let new_read = read + 3;
1005 if new_read > src.len() {
1006 return true;
1007 }
1008 let second = unsafe { *(src.get_unchecked(read + 1)) };
1009 let third = unsafe { *(src.get_unchecked(read + 2)) };
1010 if ((UTF8_DATA.table[usize::from(second)]
1011 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1012 | (third >> 6))
1013 != 2
1014 {
1015 return true;
1016 }
1017 }
1018 0xE2 => {
1019 let new_read = read + 3;
1021 if new_read > src.len() {
1022 return true;
1023 }
1024 let second = unsafe { *(src.get_unchecked(read + 1)) };
1025 let third = unsafe { *(src.get_unchecked(read + 2)) };
1026 if ((UTF8_DATA.table[usize::from(second)]
1027 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1028 | (third >> 6))
1029 != 2
1030 {
1031 return true;
1032 }
1033 if second == 0x80 {
1034 if third == 0x8F || third == 0xAB || third == 0xAE {
1035 return true;
1036 }
1037 } else if second == 0x81 {
1038 if third == 0xA7 {
1039 return true;
1040 }
1041 }
1042 }
1043 0xEF => {
1044 let new_read = read + 3;
1046 if new_read > src.len() {
1047 return true;
1048 }
1049 let second = unsafe { *(src.get_unchecked(read + 1)) };
1050 let third = unsafe { *(src.get_unchecked(read + 2)) };
1051 if ((UTF8_DATA.table[usize::from(second)]
1052 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1053 | (third >> 6))
1054 != 2
1055 {
1056 return true;
1057 }
1058 if in_inclusive_range8(second, 0xAC, 0xB7) {
1059 if second == 0xAC {
1060 if third > 0x9C {
1061 return true;
1062 }
1063 } else {
1064 return true;
1065 }
1066 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
1067 if second == 0xB9 {
1068 if third > 0xAF {
1069 return true;
1070 }
1071 } else if second == 0xBB {
1072 if third != 0xBF {
1073 return true;
1074 }
1075 } else {
1076 return true;
1077 }
1078 }
1079 }
1080 0xE0 => {
1081 let new_read = read + 3;
1083 if new_read > src.len() {
1084 return true;
1085 }
1086 let second = unsafe { *(src.get_unchecked(read + 1)) };
1087 let third = unsafe { *(src.get_unchecked(read + 2)) };
1088 if ((UTF8_DATA.table[usize::from(second)]
1089 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1090 | (third >> 6))
1091 != 2
1092 {
1093 return true;
1094 }
1095 if second < 0xA4 {
1097 return true;
1098 }
1099 }
1100 0xED => {
1101 let new_read = read + 3;
1103 if new_read > src.len() {
1104 return true;
1105 }
1106 let second = unsafe { *(src.get_unchecked(read + 1)) };
1107 let third = unsafe { *(src.get_unchecked(read + 2)) };
1108 if ((UTF8_DATA.table[usize::from(second)]
1109 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1110 | (third >> 6))
1111 != 2
1112 {
1113 return true;
1114 }
1115 }
1116 _ => {
1117 return true;
1119 }
1120 }
1121 return false;
1122 } else {
1123 return false;
1124 }
1125 }
1126}
1127
1128#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1141#[inline]
1142pub fn is_str_bidi(buffer: &str) -> bool {
1143 let mut bytes = buffer.as_bytes();
1173 'outer: loop {
1174 if let Some((mut byte, mut read)) = validate_ascii(bytes) {
1181 'inner: loop {
1182 if byte < 0xE0 {
1184 if byte >= 0x80 {
1185 if unlikely(byte >= 0xD6) {
1189 if byte == 0xD6 {
1190 let second = bytes[read + 1];
1191 if second > 0x8F {
1192 return true;
1193 }
1194 } else {
1195 return true;
1196 }
1197 }
1198 read += 2;
1199 } else {
1200 read += 1;
1202 bytes = &bytes[read..];
1208 continue 'outer;
1209 }
1210 } else if byte < 0xF0 {
1211 if unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) {
1213 let second = bytes[read + 1];
1214 if byte == 0xE0 {
1215 if second < 0xA4 {
1216 return true;
1217 }
1218 } else if byte == 0xE2 {
1219 let third = bytes[read + 2];
1220 if second == 0x80 {
1221 if third == 0x8F || third == 0xAB || third == 0xAE {
1222 return true;
1223 }
1224 } else if second == 0x81 {
1225 if third == 0xA7 {
1226 return true;
1227 }
1228 }
1229 } else {
1230 debug_assert_eq!(byte, 0xEF);
1231 if in_inclusive_range8(second, 0xAC, 0xB7) {
1232 if second == 0xAC {
1233 let third = bytes[read + 2];
1234 if third > 0x9C {
1235 return true;
1236 }
1237 } else {
1238 return true;
1239 }
1240 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
1241 if second == 0xB9 {
1242 let third = bytes[read + 2];
1243 if third > 0xAF {
1244 return true;
1245 }
1246 } else if second == 0xBB {
1247 let third = bytes[read + 2];
1248 if third != 0xBF {
1249 return true;
1250 }
1251 } else {
1252 return true;
1253 }
1254 }
1255 }
1256 }
1257 read += 3;
1258 } else {
1259 let second = bytes[read + 1];
1261 if unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) {
1262 let third = bytes[read + 2];
1263 if third >= 0xA0 {
1264 return true;
1265 }
1266 }
1267 read += 4;
1268 }
1269 if read >= bytes.len() {
1273 return false;
1274 }
1275 byte = bytes[read];
1276 continue 'inner;
1277 }
1278 } else {
1279 return false;
1280 }
1281 }
1282}
1283
1284pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
1302 is_utf16_bidi_impl(buffer)
1303}
1304
1305#[inline(always)]
1317pub fn is_char_bidi(c: char) -> bool {
1318 let code_point = u32::from(c);
1338 if code_point < 0x0590 {
1339 return false;
1341 }
1342 if in_range32(code_point, 0x0900, 0xFB1D) {
1343 if in_inclusive_range32(code_point, 0x200F, 0x2067) {
1345 return code_point == 0x200F
1347 || code_point == 0x202B
1348 || code_point == 0x202E
1349 || code_point == 0x2067;
1350 }
1351 return false;
1352 }
1353 if code_point > 0x1EFFF {
1354 return false;
1356 }
1357 if in_range32(code_point, 0x11000, 0x1E800) {
1358 return false;
1360 }
1361 if in_range32(code_point, 0xFEFF, 0x10800) {
1362 return false;
1365 }
1366 if in_range32(code_point, 0xFE00, 0xFE70) {
1367 return false;
1369 }
1370 true
1371}
1372
1373#[inline(always)]
1392pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
1393 if u < 0x0590 {
1394 return false;
1396 }
1397 if in_range16(u, 0x0900, 0xD802) {
1398 if in_inclusive_range16(u, 0x200F, 0x2067) {
1400 return u == 0x200F || u == 0x202B || u == 0x202E || u == 0x2067;
1402 }
1403 return false;
1404 }
1405 if in_range16(u, 0xD83C, 0xFB1D) {
1406 return false;
1409 }
1410 if in_range16(u, 0xD804, 0xD83A) {
1411 return false;
1413 }
1414 if u > 0xFEFE {
1415 return false;
1417 }
1418 if in_range16(u, 0xFE00, 0xFE70) {
1419 return false;
1421 }
1422 true
1423}
1424
1425pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi {
1434 if let Some(offset) = is_utf8_latin1_impl(buffer) {
1435 if is_utf8_bidi(&buffer[offset..]) {
1436 Latin1Bidi::Bidi
1437 } else {
1438 Latin1Bidi::LeftToRight
1439 }
1440 } else {
1441 Latin1Bidi::Latin1
1442 }
1443}
1444
1445pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi {
1454 if let Some(offset) = is_str_latin1_impl(buffer) {
1457 if is_str_bidi(&buffer[offset..]) {
1458 Latin1Bidi::Bidi
1459 } else {
1460 Latin1Bidi::LeftToRight
1461 }
1462 } else {
1463 Latin1Bidi::Latin1
1464 }
1465}
1466
1467pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi {
1476 check_utf16_for_latin1_and_bidi_impl(buffer)
1477}
1478
1479pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
1491 assert!(dst.len() > src.len());
1494 let mut decoder = Utf8Decoder::new_inner();
1495 let mut total_read = 0usize;
1496 let mut total_written = 0usize;
1497 loop {
1498 let (result, read, written) =
1499 decoder.decode_to_utf16_raw(&src[total_read..], &mut dst[total_written..], true);
1500 total_read += read;
1501 total_written += written;
1502 match result {
1503 DecoderResult::InputEmpty => {
1504 return total_written;
1505 }
1506 DecoderResult::OutputFull => {
1507 unreachable!("The assert at the top of the function should have caught this.");
1508 }
1509 DecoderResult::Malformed(_, _) => {
1510 dst[total_written] = 0xFFFD;
1513 total_written += 1;
1514 }
1515 }
1516 }
1517}
1518
1519pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
1530 assert!(
1531 dst.len() >= src.len(),
1532 "Destination must not be shorter than the source."
1533 );
1534 let bytes = src.as_bytes();
1535 let mut read = 0;
1536 let mut written = 0;
1537 'outer: loop {
1538 let mut byte = {
1539 let src_remaining = &bytes[read..];
1540 let dst_remaining = &mut dst[written..];
1541 let length = src_remaining.len();
1542 match unsafe {
1543 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1544 } {
1545 None => {
1546 written += length;
1547 return written;
1548 }
1549 Some((non_ascii, consumed)) => {
1550 read += consumed;
1551 written += consumed;
1552 non_ascii
1553 }
1554 }
1555 };
1556 'inner: loop {
1557 if byte < 0xE0 {
1559 if byte >= 0x80 {
1560 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1562 let point = ((u16::from(byte) & 0x1F) << 6) | (u16::from(second) & 0x3F);
1563 unsafe { *(dst.get_unchecked_mut(written)) = point };
1564 read += 2;
1565 written += 1;
1566 } else {
1567 unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
1569 read += 1;
1570 written += 1;
1571 continue 'outer;
1577 }
1578 } else if byte < 0xF0 {
1579 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1581 let third = unsafe { *(bytes.get_unchecked(read + 2)) };
1582 let point = ((u16::from(byte) & 0xF) << 12)
1583 | ((u16::from(second) & 0x3F) << 6)
1584 | (u16::from(third) & 0x3F);
1585 unsafe { *(dst.get_unchecked_mut(written)) = point };
1586 read += 3;
1587 written += 1;
1588 } else {
1589 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1591 let third = unsafe { *(bytes.get_unchecked(read + 2)) };
1592 let fourth = unsafe { *(bytes.get_unchecked(read + 3)) };
1593 let point = ((u32::from(byte) & 0x7) << 18)
1594 | ((u32::from(second) & 0x3F) << 12)
1595 | ((u32::from(third) & 0x3F) << 6)
1596 | (u32::from(fourth) & 0x3F);
1597 unsafe { *(dst.get_unchecked_mut(written)) = (0xD7C0 + (point >> 10)) as u16 };
1598 unsafe {
1599 *(dst.get_unchecked_mut(written + 1)) = (0xDC00 + (point & 0x3FF)) as u16
1600 };
1601 read += 4;
1602 written += 2;
1603 }
1604 if read >= src.len() {
1608 return written;
1609 }
1610 byte = bytes[read];
1611 continue 'inner;
1612 }
1613 }
1614}
1615
1616pub fn convert_utf8_to_utf16_without_replacement(src: &[u8], dst: &mut [u16]) -> Option<usize> {
1629 assert!(
1630 dst.len() >= src.len(),
1631 "Destination must not be shorter than the source."
1632 );
1633 let (read, written) = convert_utf8_to_utf16_up_to_invalid(src, dst);
1634 if read == src.len() {
1635 return Some(written);
1636 }
1637 None
1638}
1639
1640#[inline(always)]
1666pub fn convert_utf16_to_utf8_partial(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
1667 let (read, written) = convert_utf16_to_utf8_partial_inner(src, dst);
1675 if likely(read == src.len()) {
1676 return (read, written);
1677 }
1678 let (tail_read, tail_written) =
1679 convert_utf16_to_utf8_partial_tail(&src[read..], &mut dst[written..]);
1680 (read + tail_read, written + tail_written)
1681}
1682
1683#[inline(always)]
1701pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
1702 assert!(dst.len() >= src.len() * 3);
1703 let (read, written) = convert_utf16_to_utf8_partial(src, dst);
1704 debug_assert_eq!(read, src.len());
1705 written
1706}
1707
1708pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) {
1722 let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1723 let (read, written) = convert_utf16_to_utf8_partial(src, bytes);
1724 let len = bytes.len();
1725 let mut trail = written;
1726 while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
1727 bytes[trail] = 0;
1728 trail += 1;
1729 }
1730 (read, written)
1731}
1732
1733#[inline(always)]
1746pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
1747 assert!(dst.len() >= src.len() * 3);
1748 let (read, written) = convert_utf16_to_str_partial(src, dst);
1749 debug_assert_eq!(read, src.len());
1750 written
1751}
1752
1753pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
1765 assert!(
1766 dst.len() >= src.len(),
1767 "Destination must not be shorter than the source."
1768 );
1769 unsafe {
1773 unpack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1774 }
1775}
1776
1777pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) {
1791 let src_len = src.len();
1792 let src_ptr = src.as_ptr();
1793 let dst_ptr = dst.as_mut_ptr();
1794 let dst_len = dst.len();
1795 let mut total_read = 0usize;
1796 let mut total_written = 0usize;
1797 loop {
1798 let src_left = src_len - total_read;
1800 let dst_left = dst_len - total_written;
1801 let min_left = ::core::cmp::min(src_left, dst_left);
1802 if let Some((non_ascii, consumed)) = unsafe {
1803 ascii_to_ascii(
1804 src_ptr.add(total_read),
1805 dst_ptr.add(total_written),
1806 min_left,
1807 )
1808 } {
1809 total_read += consumed;
1810 total_written += consumed;
1811 if total_written.checked_add(2).unwrap() > dst_len {
1812 return (total_read, total_written);
1813 }
1814
1815 total_read += 1; dst[total_written] = (non_ascii >> 6) | 0xC0;
1818 total_written += 1;
1819 dst[total_written] = (non_ascii & 0x3F) | 0x80;
1820 total_written += 1;
1821 continue;
1822 }
1823 return (total_read + min_left, total_written + min_left);
1824 }
1825}
1826
1827#[inline]
1846pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
1847 assert!(
1848 dst.len() >= src.len() * 2,
1849 "Destination must not be shorter than the source times two."
1850 );
1851 let (read, written) = convert_latin1_to_utf8_partial(src, dst);
1852 debug_assert_eq!(read, src.len());
1853 written
1854}
1855
1856#[inline]
1865pub fn convert_latin1_to_str_partial(src: &[u8], dst: &mut str) -> (usize, usize) {
1866 let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1867 let (read, written) = convert_latin1_to_utf8_partial(src, bytes);
1868 let len = bytes.len();
1869 let mut trail = written;
1870 let max = ::core::cmp::min(len, trail + MAX_STRIDE_SIZE);
1871 while trail < max {
1872 bytes[trail] = 0;
1873 trail += 1;
1874 }
1875 while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
1876 bytes[trail] = 0;
1877 trail += 1;
1878 }
1879 (read, written)
1880}
1881
1882#[inline]
1895pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
1896 assert!(
1897 dst.len() >= src.len() * 2,
1898 "Destination must not be shorter than the source times two."
1899 );
1900 let (read, written) = convert_latin1_to_str_partial(src, dst);
1901 debug_assert_eq!(read, src.len());
1902 written
1903}
1904
1905pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
1929 assert!(
1930 dst.len() >= src.len(),
1931 "Destination must not be shorter than the source."
1932 );
1933 non_fuzz_debug_assert!(is_utf8_latin1(src));
1934 let src_len = src.len();
1935 let src_ptr = src.as_ptr();
1936 let dst_ptr = dst.as_mut_ptr();
1937 let mut total_read = 0usize;
1938 let mut total_written = 0usize;
1939 loop {
1940 let src_left = src_len - total_read;
1942 if let Some((non_ascii, consumed)) = unsafe {
1943 ascii_to_ascii(
1944 src_ptr.add(total_read),
1945 dst_ptr.add(total_written),
1946 src_left,
1947 )
1948 } {
1949 total_read += consumed + 1;
1950 total_written += consumed;
1951
1952 if total_read == src_len {
1953 return total_written;
1954 }
1955
1956 let trail = src[total_read];
1957 total_read += 1;
1958
1959 dst[total_written] = ((non_ascii & 0x1F) << 6) | (trail & 0x3F);
1960 total_written += 1;
1961 continue;
1962 }
1963 return total_written + src_left;
1964 }
1965}
1966
1967pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
1991 assert!(
1992 dst.len() >= src.len(),
1993 "Destination must not be shorter than the source."
1994 );
1995 unsafe {
1997 pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1998 }
1999}
2000
2001#[cfg(feature = "alloc")]
2009pub fn decode_latin1<'a>(bytes: &'a [u8]) -> Cow<'a, str> {
2010 let up_to = ascii_valid_up_to(bytes);
2011 if up_to >= bytes.len() {
2013 debug_assert_eq!(up_to, bytes.len());
2014 let s: &str = unsafe { ::core::str::from_utf8_unchecked(bytes) };
2015 return Cow::Borrowed(s);
2016 }
2017 let (head, tail) = bytes.split_at(up_to);
2018 let capacity = head.len() + tail.len() * 2;
2019 let mut vec = Vec::with_capacity(capacity);
2020 unsafe {
2021 vec.set_len(capacity);
2022 }
2023 (&mut vec[..up_to]).copy_from_slice(head);
2024 let written = convert_latin1_to_utf8(tail, &mut vec[up_to..]);
2025 vec.truncate(up_to + written);
2026 Cow::Owned(unsafe { String::from_utf8_unchecked(vec) })
2027}
2028
2029#[cfg(feature = "alloc")]
2046pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> {
2047 let bytes = string.as_bytes();
2048 let up_to = ascii_valid_up_to(bytes);
2049 if up_to >= bytes.len() {
2051 debug_assert_eq!(up_to, bytes.len());
2052 return Cow::Borrowed(bytes);
2053 }
2054 let (head, tail) = bytes.split_at(up_to);
2055 let capacity = bytes.len();
2056 let mut vec = Vec::with_capacity(capacity);
2057 unsafe {
2058 vec.set_len(capacity);
2059 }
2060 (&mut vec[..up_to]).copy_from_slice(head);
2061 let written = convert_utf8_to_latin1_lossy(tail, &mut vec[up_to..]);
2062 vec.truncate(up_to + written);
2063 Cow::Owned(vec)
2064}
2065
2066pub fn utf16_valid_up_to(buffer: &[u16]) -> usize {
2069 utf16_valid_up_to_impl(buffer)
2070}
2071
2072pub fn utf8_latin1_up_to(buffer: &[u8]) -> usize {
2076 is_utf8_latin1_impl(buffer).unwrap_or(buffer.len())
2077}
2078
2079pub fn str_latin1_up_to(buffer: &str) -> usize {
2082 is_str_latin1_impl(buffer).unwrap_or_else(|| buffer.len())
2083}
2084
2085#[inline]
2087pub fn ensure_utf16_validity(buffer: &mut [u16]) {
2088 let mut offset = 0;
2089 loop {
2090 offset += utf16_valid_up_to(&buffer[offset..]);
2091 if offset == buffer.len() {
2092 return;
2093 }
2094 buffer[offset] = 0xFFFD;
2095 offset += 1;
2096 }
2097}
2098
2099pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize {
2111 assert!(
2112 dst.len() >= src.len(),
2113 "Destination must not be shorter than the source."
2114 );
2115 if let Some((_, consumed)) =
2116 unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2117 {
2118 consumed
2119 } else {
2120 src.len()
2121 }
2122}
2123
2124pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize {
2137 assert!(
2138 dst.len() >= src.len(),
2139 "Destination must not be shorter than the source."
2140 );
2141 if let Some((_, consumed)) =
2142 unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2143 {
2144 consumed
2145 } else {
2146 src.len()
2147 }
2148}
2149
2150pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize {
2163 assert!(
2164 dst.len() >= src.len(),
2165 "Destination must not be shorter than the source."
2166 );
2167 if let Some((_, consumed)) =
2168 unsafe { basic_latin_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2169 {
2170 consumed
2171 } else {
2172 src.len()
2173 }
2174}
2175
2176#[cfg(all(test, feature = "alloc"))]
2180mod tests {
2181 use super::*;
2182
2183 #[test]
2184 fn test_is_ascii_success() {
2185 let mut src: Vec<u8> = Vec::with_capacity(128);
2186 src.resize(128, 0);
2187 for i in 0..src.len() {
2188 src[i] = i as u8;
2189 }
2190 for i in 0..src.len() {
2191 assert!(is_ascii(&src[i..]));
2192 }
2193 }
2194
2195 #[test]
2196 fn test_is_ascii_fail() {
2197 let mut src: Vec<u8> = Vec::with_capacity(128);
2198 src.resize(128, 0);
2199 for i in 0..src.len() {
2200 src[i] = i as u8;
2201 }
2202 for i in 0..src.len() {
2203 let tail = &mut src[i..];
2204 for j in 0..tail.len() {
2205 tail[j] = 0xA0;
2206 assert!(!is_ascii(tail));
2207 }
2208 }
2209 }
2210
2211 #[test]
2212 fn test_is_basic_latin_success() {
2213 let mut src: Vec<u16> = Vec::with_capacity(128);
2214 src.resize(128, 0);
2215 for i in 0..src.len() {
2216 src[i] = i as u16;
2217 }
2218 for i in 0..src.len() {
2219 assert!(is_basic_latin(&src[i..]));
2220 }
2221 }
2222
2223 #[test]
2224 fn test_is_basic_latin_fail() {
2225 let mut src: Vec<u16> = Vec::with_capacity(128);
2226 src.resize(128, 0);
2227 for i in 0..src.len() {
2228 src[i] = i as u16;
2229 }
2230 for i in 0..src.len() {
2231 let tail = &mut src[i..];
2232 for j in 0..tail.len() {
2233 tail[j] = 0xA0;
2234 assert!(!is_basic_latin(tail));
2235 }
2236 }
2237 }
2238
2239 #[test]
2240 fn test_is_utf16_latin1_success() {
2241 let mut src: Vec<u16> = Vec::with_capacity(256);
2242 src.resize(256, 0);
2243 for i in 0..src.len() {
2244 src[i] = i as u16;
2245 }
2246 for i in 0..src.len() {
2247 assert!(is_utf16_latin1(&src[i..]));
2248 assert_eq!(
2249 check_utf16_for_latin1_and_bidi(&src[i..]),
2250 Latin1Bidi::Latin1
2251 );
2252 }
2253 }
2254
2255 #[test]
2256 fn test_is_utf16_latin1_fail() {
2257 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2259 src.resize(len, 0);
2260 for i in 0..src.len() {
2261 src[i] = i as u16;
2262 }
2263 for i in 0..src.len() {
2264 let tail = &mut src[i..];
2265 for j in 0..tail.len() {
2266 tail[j] = 0x100 + j as u16;
2267 assert!(!is_utf16_latin1(tail));
2268 assert_ne!(check_utf16_for_latin1_and_bidi(tail), Latin1Bidi::Latin1);
2269 }
2270 }
2271 }
2272
2273 #[test]
2274 fn test_is_str_latin1_success() {
2275 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2277 src.resize(len, 0);
2278 for i in 0..src.len() {
2279 src[i] = i as u16;
2280 }
2281 for i in 0..src.len() {
2282 let s = String::from_utf16(&src[i..]).unwrap();
2283 assert!(is_str_latin1(&s[..]));
2284 assert_eq!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2285 }
2286 }
2287
2288 #[test]
2289 fn test_is_str_latin1_fail() {
2290 let len = if cfg!(miri) { 32 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2292 src.resize(len, 0);
2293 for i in 0..src.len() {
2294 src[i] = i as u16;
2295 }
2296 for i in 0..src.len() {
2297 let tail = &mut src[i..];
2298 for j in 0..tail.len() {
2299 tail[j] = 0x100 + j as u16;
2300 let s = String::from_utf16(tail).unwrap();
2301 assert!(!is_str_latin1(&s[..]));
2302 assert_ne!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2303 }
2304 }
2305 }
2306
2307 #[test]
2308 fn test_is_utf8_latin1_success() {
2309 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2311 src.resize(len, 0);
2312 for i in 0..src.len() {
2313 src[i] = i as u16;
2314 }
2315 for i in 0..src.len() {
2316 let s = String::from_utf16(&src[i..]).unwrap();
2317 assert!(is_utf8_latin1(s.as_bytes()));
2318 assert_eq!(
2319 check_utf8_for_latin1_and_bidi(s.as_bytes()),
2320 Latin1Bidi::Latin1
2321 );
2322 }
2323 }
2324
2325 #[test]
2326 fn test_is_utf8_latin1_fail() {
2327 let len = if cfg!(miri) { 32 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2329 src.resize(len, 0);
2330 for i in 0..src.len() {
2331 src[i] = i as u16;
2332 }
2333 for i in 0..src.len() {
2334 let tail = &mut src[i..];
2335 for j in 0..tail.len() {
2336 tail[j] = 0x100 + j as u16;
2337 let s = String::from_utf16(tail).unwrap();
2338 assert!(!is_utf8_latin1(s.as_bytes()));
2339 assert_ne!(
2340 check_utf8_for_latin1_and_bidi(s.as_bytes()),
2341 Latin1Bidi::Latin1
2342 );
2343 }
2344 }
2345 }
2346
2347 #[test]
2348 fn test_is_utf8_latin1_invalid() {
2349 assert!(!is_utf8_latin1(b"\xC3"));
2350 assert!(!is_utf8_latin1(b"a\xC3"));
2351 assert!(!is_utf8_latin1(b"\xFF"));
2352 assert!(!is_utf8_latin1(b"a\xFF"));
2353 assert!(!is_utf8_latin1(b"\xC3\xFF"));
2354 assert!(!is_utf8_latin1(b"a\xC3\xFF"));
2355 }
2356
2357 #[test]
2358 fn test_convert_utf8_to_utf16() {
2359 let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2360 let mut dst: Vec<u16> = Vec::with_capacity(src.len() + 1);
2361 dst.resize(src.len() + 1, 0);
2362 let len = convert_utf8_to_utf16(src.as_bytes(), &mut dst[..]);
2363 dst.truncate(len);
2364 let reference: Vec<u16> = src.encode_utf16().collect();
2365 assert_eq!(dst, reference);
2366 }
2367
2368 #[test]
2369 fn test_convert_str_to_utf16() {
2370 let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2371 let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2372 dst.resize(src.len(), 0);
2373 let len = convert_str_to_utf16(src, &mut dst[..]);
2374 dst.truncate(len);
2375 let reference: Vec<u16> = src.encode_utf16().collect();
2376 assert_eq!(dst, reference);
2377 }
2378
2379 #[test]
2380 fn test_convert_utf16_to_utf8_partial() {
2381 let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2382 let src: Vec<u16> = reference.encode_utf16().collect();
2383 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
2384 dst.resize(src.len() * 3 + 1, 0);
2385 let (read, written) = convert_utf16_to_utf8_partial(&src[..], &mut dst[..24]);
2386 let len = written + convert_utf16_to_utf8(&src[read..], &mut dst[written..]);
2387 dst.truncate(len);
2388 assert_eq!(dst, reference.as_bytes());
2389 }
2390
2391 #[test]
2392 fn test_convert_utf16_to_utf8() {
2393 let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2394 let src: Vec<u16> = reference.encode_utf16().collect();
2395 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
2396 dst.resize(src.len() * 3 + 1, 0);
2397 let len = convert_utf16_to_utf8(&src[..], &mut dst[..]);
2398 dst.truncate(len);
2399 assert_eq!(dst, reference.as_bytes());
2400 }
2401
2402 #[test]
2403 fn test_convert_latin1_to_utf16() {
2404 let mut src: Vec<u8> = Vec::with_capacity(256);
2405 src.resize(256, 0);
2406 let mut reference: Vec<u16> = Vec::with_capacity(256);
2407 reference.resize(256, 0);
2408 for i in 0..256 {
2409 src[i] = i as u8;
2410 reference[i] = i as u16;
2411 }
2412 let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2413 dst.resize(src.len(), 0);
2414 convert_latin1_to_utf16(&src[..], &mut dst[..]);
2415 assert_eq!(dst, reference);
2416 }
2417
2418 #[test]
2419 fn test_convert_latin1_to_utf8_partial() {
2420 let mut dst = [0u8, 2];
2421 let (read, written) = convert_latin1_to_utf8_partial(b"a\xFF", &mut dst[..]);
2422 assert_eq!(read, 1);
2423 assert_eq!(written, 1);
2424 }
2425
2426 #[test]
2427 fn test_convert_latin1_to_utf8() {
2428 let mut src: Vec<u8> = Vec::with_capacity(256);
2429 src.resize(256, 0);
2430 let mut reference: Vec<u16> = Vec::with_capacity(256);
2431 reference.resize(256, 0);
2432 for i in 0..256 {
2433 src[i] = i as u8;
2434 reference[i] = i as u16;
2435 }
2436 let s = String::from_utf16(&reference[..]).unwrap();
2437 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 2);
2438 dst.resize(src.len() * 2, 0);
2439 let len = convert_latin1_to_utf8(&src[..], &mut dst[..]);
2440 dst.truncate(len);
2441 assert_eq!(&dst[..], s.as_bytes());
2442 }
2443
2444 #[test]
2445 fn test_convert_utf8_to_latin1_lossy() {
2446 let mut reference: Vec<u8> = Vec::with_capacity(256);
2447 reference.resize(256, 0);
2448 let mut src16: Vec<u16> = Vec::with_capacity(256);
2449 src16.resize(256, 0);
2450 for i in 0..256 {
2451 src16[i] = i as u16;
2452 reference[i] = i as u8;
2453 }
2454 let src = String::from_utf16(&src16[..]).unwrap();
2455 let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2456 dst.resize(src.len(), 0);
2457 let len = convert_utf8_to_latin1_lossy(src.as_bytes(), &mut dst[..]);
2458 dst.truncate(len);
2459 assert_eq!(dst, reference);
2460 }
2461
2462 #[cfg(all(debug_assertions, not(fuzzing)))]
2463 #[test]
2464 #[should_panic]
2465 fn test_convert_utf8_to_latin1_lossy_panics() {
2466 let mut dst = [0u8; 16];
2467 let _ = convert_utf8_to_latin1_lossy("\u{100}".as_bytes(), &mut dst[..]);
2468 }
2469
2470 #[test]
2471 fn test_convert_utf16_to_latin1_lossy() {
2472 let mut src: Vec<u16> = Vec::with_capacity(256);
2473 src.resize(256, 0);
2474 let mut reference: Vec<u8> = Vec::with_capacity(256);
2475 reference.resize(256, 0);
2476 for i in 0..256 {
2477 src[i] = i as u16;
2478 reference[i] = i as u8;
2479 }
2480 let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2481 dst.resize(src.len(), 0);
2482 convert_utf16_to_latin1_lossy(&src[..], &mut dst[..]);
2483 assert_eq!(dst, reference);
2484 }
2485
2486 #[test]
2487 fn test_convert_utf16_to_latin1_lossy_panics() {
2489 let mut dst = [0u8; 16];
2490 let _ = convert_utf16_to_latin1_lossy(&[0x0100u16], &mut dst[..]);
2491 }
2492
2493 #[test]
2494 fn test_utf16_valid_up_to() {
2495 let valid = vec![
2496 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0x2603u16,
2497 0xD83Du16, 0xDCA9u16, 0x00B6u16,
2498 ];
2499 assert_eq!(utf16_valid_up_to(&valid[..]), 16);
2500 let lone_high = vec![
2501 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2502 0x2603u16, 0xD83Du16, 0x00B6u16,
2503 ];
2504 assert_eq!(utf16_valid_up_to(&lone_high[..]), 14);
2505 let lone_low = vec![
2506 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2507 0x2603u16, 0xDCA9u16, 0x00B6u16,
2508 ];
2509 assert_eq!(utf16_valid_up_to(&lone_low[..]), 14);
2510 let lone_high_at_end = vec![
2511 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2512 0x2603u16, 0x00B6u16, 0xD83Du16,
2513 ];
2514 assert_eq!(utf16_valid_up_to(&lone_high_at_end[..]), 15);
2515 }
2516
2517 #[test]
2518 fn test_ensure_utf16_validity() {
2519 let mut src = vec![
2520 0u16, 0xD83Du16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2521 0u16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2522 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2523 ];
2524 let reference = vec![
2525 0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2526 0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2527 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2528 ];
2529 ensure_utf16_validity(&mut src[..]);
2530 assert_eq!(src, reference);
2531 }
2532
2533 #[test]
2534 fn test_is_char_bidi() {
2535 assert!(!is_char_bidi('a'));
2536 assert!(!is_char_bidi('\u{03B1}'));
2537 assert!(!is_char_bidi('\u{3041}'));
2538 assert!(!is_char_bidi('\u{1F4A9}'));
2539 assert!(!is_char_bidi('\u{FE00}'));
2540 assert!(!is_char_bidi('\u{202C}'));
2541 assert!(!is_char_bidi('\u{FEFF}'));
2542 assert!(is_char_bidi('\u{0590}'));
2543 assert!(is_char_bidi('\u{08FF}'));
2544 assert!(is_char_bidi('\u{061C}'));
2545 assert!(is_char_bidi('\u{FB50}'));
2546 assert!(is_char_bidi('\u{FDFF}'));
2547 assert!(is_char_bidi('\u{FE70}'));
2548 assert!(is_char_bidi('\u{FEFE}'));
2549 assert!(is_char_bidi('\u{200F}'));
2550 assert!(is_char_bidi('\u{202B}'));
2551 assert!(is_char_bidi('\u{202E}'));
2552 assert!(is_char_bidi('\u{2067}'));
2553 assert!(is_char_bidi('\u{10800}'));
2554 assert!(is_char_bidi('\u{10FFF}'));
2555 assert!(is_char_bidi('\u{1E800}'));
2556 assert!(is_char_bidi('\u{1EFFF}'));
2557 }
2558
2559 #[test]
2560 fn test_is_utf16_code_unit_bidi() {
2561 assert!(!is_utf16_code_unit_bidi(0x0062));
2562 assert!(!is_utf16_code_unit_bidi(0x03B1));
2563 assert!(!is_utf16_code_unit_bidi(0x3041));
2564 assert!(!is_utf16_code_unit_bidi(0xD801));
2565 assert!(!is_utf16_code_unit_bidi(0xFE00));
2566 assert!(!is_utf16_code_unit_bidi(0x202C));
2567 assert!(!is_utf16_code_unit_bidi(0xFEFF));
2568 assert!(is_utf16_code_unit_bidi(0x0590));
2569 assert!(is_utf16_code_unit_bidi(0x08FF));
2570 assert!(is_utf16_code_unit_bidi(0x061C));
2571 assert!(is_utf16_code_unit_bidi(0xFB1D));
2572 assert!(is_utf16_code_unit_bidi(0xFB50));
2573 assert!(is_utf16_code_unit_bidi(0xFDFF));
2574 assert!(is_utf16_code_unit_bidi(0xFE70));
2575 assert!(is_utf16_code_unit_bidi(0xFEFE));
2576 assert!(is_utf16_code_unit_bidi(0x200F));
2577 assert!(is_utf16_code_unit_bidi(0x202B));
2578 assert!(is_utf16_code_unit_bidi(0x202E));
2579 assert!(is_utf16_code_unit_bidi(0x2067));
2580 assert!(is_utf16_code_unit_bidi(0xD802));
2581 assert!(is_utf16_code_unit_bidi(0xD803));
2582 assert!(is_utf16_code_unit_bidi(0xD83A));
2583 assert!(is_utf16_code_unit_bidi(0xD83B));
2584 }
2585
2586 #[test]
2587 fn test_is_str_bidi() {
2588 assert!(!is_str_bidi("abcdefghijklmnopaabcdefghijklmnop"));
2589 assert!(!is_str_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"));
2590 assert!(!is_str_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"));
2591 assert!(!is_str_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"));
2592 assert!(!is_str_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"));
2593 assert!(!is_str_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"));
2594 assert!(!is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
2595 assert!(is_str_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"));
2596 assert!(is_str_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"));
2597 assert!(is_str_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"));
2598 assert!(is_str_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"));
2599 assert!(is_str_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"));
2600 assert!(is_str_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"));
2601 assert!(is_str_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"));
2602 assert!(is_str_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"));
2603 assert!(is_str_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"));
2604 assert!(is_str_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"));
2605 assert!(is_str_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"));
2606 assert!(is_str_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"));
2607 assert!(is_str_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"));
2608 assert!(is_str_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"));
2609 assert!(is_str_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"));
2610 }
2611
2612 #[test]
2613 fn test_is_utf8_bidi() {
2614 assert!(!is_utf8_bidi(
2615 "abcdefghijklmnopaabcdefghijklmnop".as_bytes()
2616 ));
2617 assert!(!is_utf8_bidi(
2618 "abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()
2619 ));
2620 assert!(!is_utf8_bidi(
2621 "abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()
2622 ));
2623 assert!(!is_utf8_bidi(
2624 "abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()
2625 ));
2626 assert!(!is_utf8_bidi(
2627 "abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()
2628 ));
2629 assert!(!is_utf8_bidi(
2630 "abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()
2631 ));
2632 assert!(!is_utf8_bidi(
2633 "abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
2634 ));
2635 assert!(is_utf8_bidi(
2636 "abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()
2637 ));
2638 assert!(is_utf8_bidi(
2639 "abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()
2640 ));
2641 assert!(is_utf8_bidi(
2642 "abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()
2643 ));
2644 assert!(is_utf8_bidi(
2645 "abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()
2646 ));
2647 assert!(is_utf8_bidi(
2648 "abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()
2649 ));
2650 assert!(is_utf8_bidi(
2651 "abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()
2652 ));
2653 assert!(is_utf8_bidi(
2654 "abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()
2655 ));
2656 assert!(is_utf8_bidi(
2657 "abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()
2658 ));
2659 assert!(is_utf8_bidi(
2660 "abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()
2661 ));
2662 assert!(is_utf8_bidi(
2663 "abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()
2664 ));
2665 assert!(is_utf8_bidi(
2666 "abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()
2667 ));
2668 assert!(is_utf8_bidi(
2669 "abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()
2670 ));
2671 assert!(is_utf8_bidi(
2672 "abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()
2673 ));
2674 assert!(is_utf8_bidi(
2675 "abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()
2676 ));
2677 assert!(is_utf8_bidi(
2678 "abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()
2679 ));
2680 }
2681
2682 #[test]
2683 fn test_is_utf16_bidi() {
2684 assert!(!is_utf16_bidi(&[
2685 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65, 0x66,
2686 0x67, 0x68, 0x69,
2687 ]));
2688 assert!(!is_utf16_bidi(&[
2689 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65, 0x66,
2690 0x67, 0x68, 0x69,
2691 ]));
2692 assert!(!is_utf16_bidi(&[
2693 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65, 0x66,
2694 0x67, 0x68, 0x69,
2695 ]));
2696 assert!(!is_utf16_bidi(&[
2697 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65, 0x66,
2698 0x67, 0x68, 0x69,
2699 ]));
2700 assert!(!is_utf16_bidi(&[
2701 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65, 0x66,
2702 0x67, 0x68, 0x69,
2703 ]));
2704 assert!(!is_utf16_bidi(&[
2705 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65, 0x66,
2706 0x67, 0x68, 0x69,
2707 ]));
2708 assert!(!is_utf16_bidi(&[
2709 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
2710 0x67, 0x68, 0x69,
2711 ]));
2712 assert!(is_utf16_bidi(&[
2713 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65, 0x66,
2714 0x67, 0x68, 0x69,
2715 ]));
2716 assert!(is_utf16_bidi(&[
2717 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65, 0x66,
2718 0x67, 0x68, 0x69,
2719 ]));
2720 assert!(is_utf16_bidi(&[
2721 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65, 0x66,
2722 0x67, 0x68, 0x69,
2723 ]));
2724 assert!(is_utf16_bidi(&[
2725 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65, 0x66,
2726 0x67, 0x68, 0x69,
2727 ]));
2728 assert!(is_utf16_bidi(&[
2729 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65, 0x66,
2730 0x67, 0x68, 0x69,
2731 ]));
2732 assert!(is_utf16_bidi(&[
2733 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65, 0x66,
2734 0x67, 0x68, 0x69,
2735 ]));
2736 assert!(is_utf16_bidi(&[
2737 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65, 0x66,
2738 0x67, 0x68, 0x69,
2739 ]));
2740 assert!(is_utf16_bidi(&[
2741 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65, 0x66,
2742 0x67, 0x68, 0x69,
2743 ]));
2744 assert!(is_utf16_bidi(&[
2745 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65, 0x66,
2746 0x67, 0x68, 0x69,
2747 ]));
2748 assert!(is_utf16_bidi(&[
2749 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65, 0x66,
2750 0x67, 0x68, 0x69,
2751 ]));
2752 assert!(is_utf16_bidi(&[
2753 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65, 0x66,
2754 0x67, 0x68, 0x69,
2755 ]));
2756 assert!(is_utf16_bidi(&[
2757 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65, 0x66,
2758 0x67, 0x68, 0x69,
2759 ]));
2760 assert!(is_utf16_bidi(&[
2761 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65, 0x66,
2762 0x67, 0x68, 0x69,
2763 ]));
2764 assert!(is_utf16_bidi(&[
2765 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65, 0x66,
2766 0x67, 0x68, 0x69,
2767 ]));
2768 assert!(is_utf16_bidi(&[
2769 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65, 0x66,
2770 0x67, 0x68, 0x69,
2771 ]));
2772 assert!(is_utf16_bidi(&[
2773 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65, 0x66,
2774 0x67, 0x68, 0x69,
2775 ]));
2776
2777 assert!(is_utf16_bidi(&[
2778 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63, 0x64, 0x65,
2779 0x66, 0x67, 0x68, 0x69,
2780 ]));
2781 }
2782
2783 #[test]
2784 fn test_check_str_for_latin1_and_bidi() {
2785 assert_ne!(
2786 check_str_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop"),
2787 Latin1Bidi::Bidi
2788 );
2789 assert_ne!(
2790 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"),
2791 Latin1Bidi::Bidi
2792 );
2793 assert_ne!(
2794 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"),
2795 Latin1Bidi::Bidi
2796 );
2797 assert_ne!(
2798 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"),
2799 Latin1Bidi::Bidi
2800 );
2801 assert_ne!(
2802 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"),
2803 Latin1Bidi::Bidi
2804 );
2805 assert_ne!(
2806 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"),
2807 Latin1Bidi::Bidi
2808 );
2809 assert_ne!(
2810 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
2811 Latin1Bidi::Bidi
2812 );
2813 assert_eq!(
2814 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"),
2815 Latin1Bidi::Bidi
2816 );
2817 assert_eq!(
2818 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"),
2819 Latin1Bidi::Bidi
2820 );
2821 assert_eq!(
2822 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"),
2823 Latin1Bidi::Bidi
2824 );
2825 assert_eq!(
2826 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"),
2827 Latin1Bidi::Bidi
2828 );
2829 assert_eq!(
2830 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"),
2831 Latin1Bidi::Bidi
2832 );
2833 assert_eq!(
2834 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"),
2835 Latin1Bidi::Bidi
2836 );
2837 assert_eq!(
2838 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"),
2839 Latin1Bidi::Bidi
2840 );
2841 assert_eq!(
2842 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"),
2843 Latin1Bidi::Bidi
2844 );
2845 assert_eq!(
2846 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"),
2847 Latin1Bidi::Bidi
2848 );
2849 assert_eq!(
2850 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"),
2851 Latin1Bidi::Bidi
2852 );
2853 assert_eq!(
2854 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"),
2855 Latin1Bidi::Bidi
2856 );
2857 assert_eq!(
2858 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"),
2859 Latin1Bidi::Bidi
2860 );
2861 assert_eq!(
2862 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"),
2863 Latin1Bidi::Bidi
2864 );
2865 assert_eq!(
2866 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"),
2867 Latin1Bidi::Bidi
2868 );
2869 assert_eq!(
2870 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"),
2871 Latin1Bidi::Bidi
2872 );
2873 }
2874
2875 #[test]
2876 fn test_check_utf8_for_latin1_and_bidi() {
2877 assert_ne!(
2878 check_utf8_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop".as_bytes()),
2879 Latin1Bidi::Bidi
2880 );
2881 assert_ne!(
2882 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()),
2883 Latin1Bidi::Bidi
2884 );
2885 assert_ne!(
2886 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()),
2887 Latin1Bidi::Bidi
2888 );
2889 assert_ne!(
2890 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()),
2891 Latin1Bidi::Bidi
2892 );
2893 assert_ne!(
2894 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()),
2895 Latin1Bidi::Bidi
2896 );
2897 assert_ne!(
2898 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()),
2899 Latin1Bidi::Bidi
2900 );
2901 assert_ne!(
2902 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
2903 Latin1Bidi::Bidi
2904 );
2905 assert_eq!(
2906 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()),
2907 Latin1Bidi::Bidi
2908 );
2909 assert_eq!(
2910 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()),
2911 Latin1Bidi::Bidi
2912 );
2913 assert_eq!(
2914 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()),
2915 Latin1Bidi::Bidi
2916 );
2917 assert_eq!(
2918 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()),
2919 Latin1Bidi::Bidi
2920 );
2921 assert_eq!(
2922 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()),
2923 Latin1Bidi::Bidi
2924 );
2925 assert_eq!(
2926 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()),
2927 Latin1Bidi::Bidi
2928 );
2929 assert_eq!(
2930 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()),
2931 Latin1Bidi::Bidi
2932 );
2933 assert_eq!(
2934 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()),
2935 Latin1Bidi::Bidi
2936 );
2937 assert_eq!(
2938 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()),
2939 Latin1Bidi::Bidi
2940 );
2941 assert_eq!(
2942 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()),
2943 Latin1Bidi::Bidi
2944 );
2945 assert_eq!(
2946 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()),
2947 Latin1Bidi::Bidi
2948 );
2949 assert_eq!(
2950 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()),
2951 Latin1Bidi::Bidi
2952 );
2953 assert_eq!(
2954 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()),
2955 Latin1Bidi::Bidi
2956 );
2957 assert_eq!(
2958 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()),
2959 Latin1Bidi::Bidi
2960 );
2961 assert_eq!(
2962 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()),
2963 Latin1Bidi::Bidi
2964 );
2965 }
2966
2967 #[test]
2968 fn test_check_utf16_for_latin1_and_bidi() {
2969 assert_ne!(
2970 check_utf16_for_latin1_and_bidi(&[
2971 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65,
2972 0x66, 0x67, 0x68, 0x69,
2973 ]),
2974 Latin1Bidi::Bidi
2975 );
2976 assert_ne!(
2977 check_utf16_for_latin1_and_bidi(&[
2978 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65,
2979 0x66, 0x67, 0x68, 0x69,
2980 ]),
2981 Latin1Bidi::Bidi
2982 );
2983 assert_ne!(
2984 check_utf16_for_latin1_and_bidi(&[
2985 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65,
2986 0x66, 0x67, 0x68, 0x69,
2987 ]),
2988 Latin1Bidi::Bidi
2989 );
2990 assert_ne!(
2991 check_utf16_for_latin1_and_bidi(&[
2992 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65,
2993 0x66, 0x67, 0x68, 0x69,
2994 ]),
2995 Latin1Bidi::Bidi
2996 );
2997 assert_ne!(
2998 check_utf16_for_latin1_and_bidi(&[
2999 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65,
3000 0x66, 0x67, 0x68, 0x69,
3001 ]),
3002 Latin1Bidi::Bidi
3003 );
3004 assert_ne!(
3005 check_utf16_for_latin1_and_bidi(&[
3006 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65,
3007 0x66, 0x67, 0x68, 0x69,
3008 ]),
3009 Latin1Bidi::Bidi
3010 );
3011 assert_ne!(
3012 check_utf16_for_latin1_and_bidi(&[
3013 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
3014 0x66, 0x67, 0x68, 0x69,
3015 ]),
3016 Latin1Bidi::Bidi
3017 );
3018 assert_eq!(
3019 check_utf16_for_latin1_and_bidi(&[
3020 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
3021 0x66, 0x67, 0x68, 0x69,
3022 ]),
3023 Latin1Bidi::Bidi
3024 );
3025 assert_eq!(
3026 check_utf16_for_latin1_and_bidi(&[
3027 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65,
3028 0x66, 0x67, 0x68, 0x69,
3029 ]),
3030 Latin1Bidi::Bidi
3031 );
3032 assert_eq!(
3033 check_utf16_for_latin1_and_bidi(&[
3034 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65,
3035 0x66, 0x67, 0x68, 0x69,
3036 ]),
3037 Latin1Bidi::Bidi
3038 );
3039 assert_eq!(
3040 check_utf16_for_latin1_and_bidi(&[
3041 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65,
3042 0x66, 0x67, 0x68, 0x69,
3043 ]),
3044 Latin1Bidi::Bidi
3045 );
3046 assert_eq!(
3047 check_utf16_for_latin1_and_bidi(&[
3048 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
3049 0x66, 0x67, 0x68, 0x69,
3050 ]),
3051 Latin1Bidi::Bidi
3052 );
3053 assert_eq!(
3054 check_utf16_for_latin1_and_bidi(&[
3055 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65,
3056 0x66, 0x67, 0x68, 0x69,
3057 ]),
3058 Latin1Bidi::Bidi
3059 );
3060 assert_eq!(
3061 check_utf16_for_latin1_and_bidi(&[
3062 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65,
3063 0x66, 0x67, 0x68, 0x69,
3064 ]),
3065 Latin1Bidi::Bidi
3066 );
3067 assert_eq!(
3068 check_utf16_for_latin1_and_bidi(&[
3069 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65,
3070 0x66, 0x67, 0x68, 0x69,
3071 ]),
3072 Latin1Bidi::Bidi
3073 );
3074 assert_eq!(
3075 check_utf16_for_latin1_and_bidi(&[
3076 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65,
3077 0x66, 0x67, 0x68, 0x69,
3078 ]),
3079 Latin1Bidi::Bidi
3080 );
3081 assert_eq!(
3082 check_utf16_for_latin1_and_bidi(&[
3083 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65,
3084 0x66, 0x67, 0x68, 0x69,
3085 ]),
3086 Latin1Bidi::Bidi
3087 );
3088 assert_eq!(
3089 check_utf16_for_latin1_and_bidi(&[
3090 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65,
3091 0x66, 0x67, 0x68, 0x69,
3092 ]),
3093 Latin1Bidi::Bidi
3094 );
3095 assert_eq!(
3096 check_utf16_for_latin1_and_bidi(&[
3097 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65,
3098 0x66, 0x67, 0x68, 0x69,
3099 ]),
3100 Latin1Bidi::Bidi
3101 );
3102 assert_eq!(
3103 check_utf16_for_latin1_and_bidi(&[
3104 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65,
3105 0x66, 0x67, 0x68, 0x69,
3106 ]),
3107 Latin1Bidi::Bidi
3108 );
3109 assert_eq!(
3110 check_utf16_for_latin1_and_bidi(&[
3111 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65,
3112 0x66, 0x67, 0x68, 0x69,
3113 ]),
3114 Latin1Bidi::Bidi
3115 );
3116 assert_eq!(
3117 check_utf16_for_latin1_and_bidi(&[
3118 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65,
3119 0x66, 0x67, 0x68, 0x69,
3120 ]),
3121 Latin1Bidi::Bidi
3122 );
3123 assert_eq!(
3124 check_utf16_for_latin1_and_bidi(&[
3125 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65,
3126 0x66, 0x67, 0x68, 0x69,
3127 ]),
3128 Latin1Bidi::Bidi
3129 );
3130
3131 assert_eq!(
3132 check_utf16_for_latin1_and_bidi(&[
3133 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63, 0x64,
3134 0x65, 0x66, 0x67, 0x68, 0x69,
3135 ]),
3136 Latin1Bidi::Bidi
3137 );
3138 }
3139
3140 #[inline(always)]
3141 pub fn reference_is_char_bidi(c: char) -> bool {
3142 match c {
3143 '\u{0590}'..='\u{08FF}'
3144 | '\u{FB1D}'..='\u{FDFF}'
3145 | '\u{FE70}'..='\u{FEFE}'
3146 | '\u{10800}'..='\u{10FFF}'
3147 | '\u{1E800}'..='\u{1EFFF}'
3148 | '\u{200F}'
3149 | '\u{202B}'
3150 | '\u{202E}'
3151 | '\u{2067}' => true,
3152 _ => false,
3153 }
3154 }
3155
3156 #[inline(always)]
3157 pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
3158 match u {
3159 0x0590..=0x08FF
3160 | 0xFB1D..=0xFDFF
3161 | 0xFE70..=0xFEFE
3162 | 0xD802
3163 | 0xD803
3164 | 0xD83A
3165 | 0xD83B
3166 | 0x200F
3167 | 0x202B
3168 | 0x202E
3169 | 0x2067 => true,
3170 _ => false,
3171 }
3172 }
3173
3174 #[test]
3175 #[cfg_attr(miri, ignore)] fn test_is_char_bidi_thoroughly() {
3177 for i in 0..0xD800u32 {
3178 let c: char = ::core::char::from_u32(i).unwrap();
3179 assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3180 }
3181 for i in 0xE000..0x110000u32 {
3182 let c: char = ::core::char::from_u32(i).unwrap();
3183 assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3184 }
3185 }
3186
3187 #[test]
3188 #[cfg_attr(miri, ignore)] fn test_is_utf16_code_unit_bidi_thoroughly() {
3190 for i in 0..0x10000u32 {
3191 let u = i as u16;
3192 assert_eq!(
3193 is_utf16_code_unit_bidi(u),
3194 reference_is_utf16_code_unit_bidi(u)
3195 );
3196 }
3197 }
3198
3199 #[test]
3200 #[cfg_attr(miri, ignore)] fn test_is_str_bidi_thoroughly() {
3202 let mut buf = [0; 4];
3203 for i in 0..0xD800u32 {
3204 let c: char = ::core::char::from_u32(i).unwrap();
3205 assert_eq!(
3206 is_str_bidi(c.encode_utf8(&mut buf[..])),
3207 reference_is_char_bidi(c)
3208 );
3209 }
3210 for i in 0xE000..0x110000u32 {
3211 let c: char = ::core::char::from_u32(i).unwrap();
3212 assert_eq!(
3213 is_str_bidi(c.encode_utf8(&mut buf[..])),
3214 reference_is_char_bidi(c)
3215 );
3216 }
3217 }
3218
3219 #[test]
3220 #[cfg_attr(miri, ignore)] fn test_is_utf8_bidi_thoroughly() {
3222 let mut buf = [0; 8];
3223 for i in 0..0xD800u32 {
3224 let c: char = ::core::char::from_u32(i).unwrap();
3225 let expect = reference_is_char_bidi(c);
3226 {
3227 let len = {
3228 let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3229 assert_eq!(is_utf8_bidi(bytes), expect);
3230 bytes.len()
3231 };
3232 {
3233 let tail = &mut buf[len..];
3234 for b in tail.iter_mut() {
3235 *b = 0;
3236 }
3237 }
3238 }
3239 assert_eq!(is_utf8_bidi(&buf[..]), expect);
3240 }
3241 for i in 0xE000..0x110000u32 {
3242 let c: char = ::core::char::from_u32(i).unwrap();
3243 let expect = reference_is_char_bidi(c);
3244 {
3245 let len = {
3246 let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3247 assert_eq!(is_utf8_bidi(bytes), expect);
3248 bytes.len()
3249 };
3250 {
3251 let tail = &mut buf[len..];
3252 for b in tail.iter_mut() {
3253 *b = 0;
3254 }
3255 }
3256 }
3257 assert_eq!(is_utf8_bidi(&buf[..]), expect);
3258 }
3259 }
3260
3261 #[test]
3262 #[cfg_attr(miri, ignore)] fn test_is_utf16_bidi_thoroughly() {
3264 let mut buf = [0; 32];
3265 for i in 0..0x10000u32 {
3266 let u = i as u16;
3267 buf[15] = u;
3268 assert_eq!(
3269 is_utf16_bidi(&buf[..]),
3270 reference_is_utf16_code_unit_bidi(u)
3271 );
3272 }
3273 }
3274
3275 #[test]
3276 fn test_is_utf8_bidi_edge_cases() {
3277 assert!(!is_utf8_bidi(b"\xD5\xBF\x61"));
3278 assert!(!is_utf8_bidi(b"\xD6\x80\x61"));
3279 assert!(!is_utf8_bidi(b"abc"));
3280 assert!(is_utf8_bidi(b"\xD5\xBF\xC2"));
3281 assert!(is_utf8_bidi(b"\xD6\x80\xC2"));
3282 assert!(is_utf8_bidi(b"ab\xC2"));
3283 }
3284
3285 #[test]
3286 fn test_decode_latin1() {
3287 match decode_latin1(b"ab") {
3288 Cow::Borrowed(s) => {
3289 assert_eq!(s, "ab");
3290 }
3291 Cow::Owned(_) => {
3292 unreachable!("Should have borrowed");
3293 }
3294 }
3295 assert_eq!(decode_latin1(b"a\xE4"), "a\u{E4}");
3296 }
3297
3298 #[test]
3299 fn test_encode_latin1_lossy() {
3300 match encode_latin1_lossy("ab") {
3301 Cow::Borrowed(s) => {
3302 assert_eq!(s, b"ab");
3303 }
3304 Cow::Owned(_) => {
3305 unreachable!("Should have borrowed");
3306 }
3307 }
3308 assert_eq!(encode_latin1_lossy("a\u{E4}"), &(b"a\xE4")[..]);
3309 }
3310
3311 #[test]
3312 fn test_convert_utf8_to_utf16_without_replacement() {
3313 let mut buf = [0u16; 5];
3314 assert_eq!(
3315 convert_utf8_to_utf16_without_replacement(b"ab", &mut buf[..2]),
3316 Some(2)
3317 );
3318 assert_eq!(buf[0], u16::from(b'a'));
3319 assert_eq!(buf[1], u16::from(b'b'));
3320 assert_eq!(buf[2], 0);
3321 assert_eq!(
3322 convert_utf8_to_utf16_without_replacement(b"\xC3\xA4c", &mut buf[..3]),
3323 Some(2)
3324 );
3325 assert_eq!(buf[0], 0xE4);
3326 assert_eq!(buf[1], u16::from(b'c'));
3327 assert_eq!(buf[2], 0);
3328 assert_eq!(
3329 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83", &mut buf[..3]),
3330 Some(1)
3331 );
3332 assert_eq!(buf[0], 0x2603);
3333 assert_eq!(buf[1], u16::from(b'c'));
3334 assert_eq!(buf[2], 0);
3335 assert_eq!(
3336 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83d", &mut buf[..4]),
3337 Some(2)
3338 );
3339 assert_eq!(buf[0], 0x2603);
3340 assert_eq!(buf[1], u16::from(b'd'));
3341 assert_eq!(buf[2], 0);
3342 assert_eq!(
3343 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83\xC3\xA4", &mut buf[..5]),
3344 Some(2)
3345 );
3346 assert_eq!(buf[0], 0x2603);
3347 assert_eq!(buf[1], 0xE4);
3348 assert_eq!(buf[2], 0);
3349 assert_eq!(
3350 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93\x8E", &mut buf[..4]),
3351 Some(2)
3352 );
3353 assert_eq!(buf[0], 0xD83D);
3354 assert_eq!(buf[1], 0xDCCE);
3355 assert_eq!(buf[2], 0);
3356 assert_eq!(
3357 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93\x8Ee", &mut buf[..5]),
3358 Some(3)
3359 );
3360 assert_eq!(buf[0], 0xD83D);
3361 assert_eq!(buf[1], 0xDCCE);
3362 assert_eq!(buf[2], u16::from(b'e'));
3363 assert_eq!(
3364 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93", &mut buf[..5]),
3365 None
3366 );
3367 }
3368}