1#[cfg(all(
20 feature = "simd-accel",
21 any(
22 target_feature = "sse2",
23 all(target_endian = "little", target_arch = "aarch64"),
24 all(target_endian = "little", target_feature = "neon")
25 )
26))]
27use crate::simd_funcs::*;
28
29#[cfg(all(
30 feature = "simd-accel",
31 any(
32 target_feature = "sse2",
33 all(target_endian = "little", target_arch = "aarch64"),
34 all(target_endian = "little", target_feature = "neon")
35 )
36))]
37use core::simd::u16x8;
38
39use super::DecoderResult;
40use super::EncoderResult;
41use crate::ascii::*;
42use crate::utf_8::convert_utf8_to_utf16_up_to_invalid;
43use crate::utf_8::utf8_valid_up_to;
44
45pub enum Space<T> {
46 Available(T),
47 Full(usize),
48}
49
50pub enum CopyAsciiResult<T, U> {
51 Stop(T),
52 GoOn(U),
53}
54
55pub enum NonAscii {
56 BmpExclAscii(u16),
57 Astral(char),
58}
59
60pub enum Unicode {
61 Ascii(u8),
62 NonAscii(NonAscii),
63}
64
65pub trait Endian {
68 const OPPOSITE_ENDIAN: bool;
69}
70
71pub struct BigEndian;
72
73impl Endian for BigEndian {
74 #[cfg(target_endian = "little")]
75 const OPPOSITE_ENDIAN: bool = true;
76
77 #[cfg(target_endian = "big")]
78 const OPPOSITE_ENDIAN: bool = false;
79}
80
81pub struct LittleEndian;
82
83impl Endian for LittleEndian {
84 #[cfg(target_endian = "little")]
85 const OPPOSITE_ENDIAN: bool = false;
86
87 #[cfg(target_endian = "big")]
88 const OPPOSITE_ENDIAN: bool = true;
89}
90
91#[derive(Debug, Copy, Clone)]
92struct UnalignedU16Slice {
93 ptr: *const u8,
95 len: usize,
96}
97
98impl UnalignedU16Slice {
99 #[inline(always)]
101 pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
102 UnalignedU16Slice { ptr, len }
104 }
105
106 #[inline(always)]
107 pub fn trim_last(&mut self) {
108 assert!(self.len > 0);
109 self.len -= 1;
111 }
112
113 #[inline(always)]
114 pub fn at(&self, i: usize) -> u16 {
115 use core::mem::MaybeUninit;
116
117 assert!(i < self.len);
118 unsafe {
119 let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
120 ::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
122 u.assume_init()
124 }
125 }
126
127 #[cfg(feature = "simd-accel")]
128 #[inline(always)]
129 pub fn simd_at(&self, i: usize) -> u16x8 {
130 assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
132 let byte_index = i * 2;
133 unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
138 }
139
140 #[inline(always)]
141 pub fn len(&self) -> usize {
142 self.len
143 }
144
145 #[inline(always)]
146 pub fn tail(&self, from: usize) -> UnalignedU16Slice {
147 assert!(from <= self.len);
150 unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
152 }
153
154 #[cfg(feature = "simd-accel")]
155 #[inline(always)]
156 pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
157 assert!(self.len <= other.len());
158 let mut offset = 0;
159 if SIMD_STRIDE_SIZE / 2 <= self.len {
162 let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
163 loop {
164 let mut simd = self.simd_at(offset);
165 if E::OPPOSITE_ENDIAN {
166 simd = simd_byte_swap(simd);
167 }
168 unsafe {
170 store8_unaligned(other.as_mut_ptr().add(offset), simd);
171 }
172 if contains_surrogates(simd) {
173 break;
174 }
175 offset += SIMD_STRIDE_SIZE / 2;
176 if offset > len_minus_stride {
178 break;
179 }
180 }
181 }
182 while offset < self.len {
183 let unit = swap_if_opposite_endian::<E>(self.at(offset));
184 other[offset] = unit;
185 if super::in_range16(unit, 0xD800, 0xE000) {
186 return Some((unit, offset));
187 }
188 offset += 1;
189 }
190 None
191 }
192
193 #[cfg(not(feature = "simd-accel"))]
194 #[inline(always)]
195 fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
196 assert!(self.len <= other.len());
197 for (i, target) in other.iter_mut().enumerate().take(self.len) {
198 let unit = swap_if_opposite_endian::<E>(self.at(i));
199 *target = unit;
200 if super::in_range16(unit, 0xD800, 0xE000) {
201 return Some((unit, i));
202 }
203 }
204 None
205 }
206}
207
208#[inline(always)]
209fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>(
210 src: UnalignedU16Slice,
211 dst: &mut [u8],
212 offset: usize,
213) -> CopyAsciiResult<usize, (u16, usize)> {
214 let len = ::core::cmp::min(src.len(), dst.len());
215 let mut i = 0usize;
216 loop {
217 if i == len {
218 return CopyAsciiResult::Stop(i + offset);
219 }
220 let unit = swap_if_opposite_endian::<E>(src.at(i));
221 if unit > 0x7F {
222 return CopyAsciiResult::GoOn((unit, i + offset));
223 }
224 dst[i] = unit as u8;
225 i += 1;
226 }
227}
228
229#[inline(always)]
230fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 {
231 if E::OPPOSITE_ENDIAN {
232 unit.swap_bytes()
233 } else {
234 unit
235 }
236}
237
238#[cfg(not(feature = "simd-accel"))]
239#[inline(always)]
240fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
241 src: UnalignedU16Slice,
242 dst: &mut [u8],
243) -> CopyAsciiResult<usize, (u16, usize)> {
244 copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, 0)
245}
246
247#[cfg(feature = "simd-accel")]
248#[inline(always)]
249fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
250 src: UnalignedU16Slice,
251 dst: &mut [u8],
252) -> CopyAsciiResult<usize, (u16, usize)> {
253 let len = ::core::cmp::min(src.len(), dst.len());
254 let mut offset = 0;
255 if SIMD_STRIDE_SIZE <= len {
257 let len_minus_stride = len - SIMD_STRIDE_SIZE;
258 loop {
259 let mut first = src.simd_at(offset);
260 let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
261 if E::OPPOSITE_ENDIAN {
262 first = simd_byte_swap(first);
263 second = simd_byte_swap(second);
264 }
265 if !simd_is_basic_latin(first | second) {
266 break;
267 }
268 let packed = simd_pack(first, second);
269 unsafe {
271 store16_unaligned(dst.as_mut_ptr().add(offset), packed);
272 }
273 offset += SIMD_STRIDE_SIZE;
274 if offset > len_minus_stride {
277 break;
278 }
279 }
280 }
281 copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
282}
283
284#[inline(always)]
285fn convert_unaligned_utf16_to_utf8<E: Endian>(
286 src: UnalignedU16Slice,
287 dst: &mut [u8],
288) -> (usize, usize, bool) {
289 if dst.len() < 4 {
290 return (0, 0, false);
291 }
292 let mut src_pos = 0usize;
293 let mut dst_pos = 0usize;
294 let src_len = src.len();
295 let dst_len_minus_three = dst.len() - 3;
296 'outer: loop {
297 let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>(
298 src.tail(src_pos),
299 &mut dst[dst_pos..],
300 ) {
301 CopyAsciiResult::GoOn((unit, read_written)) => {
302 src_pos += read_written;
303 dst_pos += read_written;
304 unit
305 }
306 CopyAsciiResult::Stop(read_written) => {
307 return (src_pos + read_written, dst_pos + read_written, false);
308 }
309 };
310 if dst_pos >= dst_len_minus_three {
311 break 'outer;
312 }
313 src_pos += 1;
316 'inner: loop {
317 let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800);
318 if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) {
319 if non_ascii < 0x800 {
320 dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8;
321 dst_pos += 1;
322 dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
323 dst_pos += 1;
324 } else {
325 dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8;
326 dst_pos += 1;
327 dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8;
328 dst_pos += 1;
329 dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8;
330 dst_pos += 1;
331 }
332 } else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) {
333 if src_pos < src_len {
335 let second = swap_if_opposite_endian::<E>(src.at(src_pos));
336 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
337 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
338 src_pos += 1;
340 let point = (u32::from(non_ascii) << 10) + u32::from(second)
341 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
342
343 dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8;
344 dst_pos += 1;
345 dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8;
346 dst_pos += 1;
347 dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8;
348 dst_pos += 1;
349 dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8;
350 dst_pos += 1;
351 } else {
352 return (src_pos, dst_pos, true);
355 }
356 } else {
357 return (src_pos, dst_pos, true);
359 }
360 } else {
361 return (src_pos, dst_pos, true);
363 }
364 if dst_pos >= dst_len_minus_three || src_pos == src_len {
365 break 'outer;
366 }
367 let unit = swap_if_opposite_endian::<E>(src.at(src_pos));
368 src_pos += 1;
369 if unit > 0x7F {
370 non_ascii = unit;
371 continue 'inner;
372 }
373 dst[dst_pos] = unit as u8;
374 dst_pos += 1;
375 continue 'outer;
376 }
377 }
378 (src_pos, dst_pos, false)
379}
380
381pub struct ByteSource<'a> {
384 slice: &'a [u8],
385 pos: usize,
386}
387
388impl<'a> ByteSource<'a> {
389 #[inline(always)]
390 pub fn new(src: &[u8]) -> ByteSource {
391 ByteSource { slice: src, pos: 0 }
392 }
393 #[inline(always)]
394 pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> {
395 if self.pos < self.slice.len() {
396 Space::Available(ByteReadHandle::new(self))
397 } else {
398 Space::Full(self.consumed())
399 }
400 }
401 #[inline(always)]
402 fn read(&mut self) -> u8 {
403 let ret = self.slice[self.pos];
404 self.pos += 1;
405 ret
406 }
407 #[inline(always)]
408 fn unread(&mut self) -> usize {
409 self.pos -= 1;
410 self.pos
411 }
412 #[inline(always)]
413 pub fn consumed(&self) -> usize {
414 self.pos
415 }
416}
417
418pub struct ByteReadHandle<'a, 'b>
419where
420 'b: 'a,
421{
422 source: &'a mut ByteSource<'b>,
423}
424
425impl<'a, 'b> ByteReadHandle<'a, 'b>
426where
427 'b: 'a,
428{
429 #[inline(always)]
430 fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> {
431 ByteReadHandle { source: src }
432 }
433 #[inline(always)]
434 pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) {
435 let byte = self.source.read();
436 let handle = ByteUnreadHandle::new(self.source);
437 (byte, handle)
438 }
439 #[inline(always)]
440 pub fn consumed(&self) -> usize {
441 self.source.consumed()
442 }
443}
444
445pub struct ByteUnreadHandle<'a, 'b>
446where
447 'b: 'a,
448{
449 source: &'a mut ByteSource<'b>,
450}
451
452impl<'a, 'b> ByteUnreadHandle<'a, 'b>
453where
454 'b: 'a,
455{
456 #[inline(always)]
457 fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> {
458 ByteUnreadHandle { source: src }
459 }
460 #[inline(always)]
461 pub fn unread(self) -> usize {
462 self.source.unread()
463 }
464 #[inline(always)]
465 pub fn consumed(&self) -> usize {
466 self.source.consumed()
467 }
468 #[inline(always)]
469 pub fn commit(self) -> &'a mut ByteSource<'b> {
470 self.source
471 }
472}
473
474pub struct Utf16BmpHandle<'a, 'b>
477where
478 'b: 'a,
479{
480 dest: &'a mut Utf16Destination<'b>,
481}
482
483impl<'a, 'b> Utf16BmpHandle<'a, 'b>
484where
485 'b: 'a,
486{
487 #[inline(always)]
488 fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> {
489 Utf16BmpHandle { dest: dst }
490 }
491 #[inline(always)]
492 pub fn written(&self) -> usize {
493 self.dest.written()
494 }
495 #[inline(always)]
496 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
497 self.dest.write_ascii(ascii);
498 self.dest
499 }
500 #[inline(always)]
501 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
502 self.dest.write_bmp(bmp);
503 self.dest
504 }
505 #[inline(always)]
506 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
507 self.dest.write_bmp_excl_ascii(bmp);
508 self.dest
509 }
510 #[inline(always)]
511 pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
512 self.dest.write_mid_bmp(bmp);
513 self.dest
514 }
515 #[inline(always)]
516 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
517 self.dest.write_upper_bmp(bmp);
518 self.dest
519 }
520 #[inline(always)]
521 pub fn commit(self) -> &'a mut Utf16Destination<'b> {
522 self.dest
523 }
524}
525
526pub struct Utf16AstralHandle<'a, 'b>
527where
528 'b: 'a,
529{
530 dest: &'a mut Utf16Destination<'b>,
531}
532
533impl<'a, 'b> Utf16AstralHandle<'a, 'b>
534where
535 'b: 'a,
536{
537 #[inline(always)]
538 fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> {
539 Utf16AstralHandle { dest: dst }
540 }
541 #[inline(always)]
542 pub fn written(&self) -> usize {
543 self.dest.written()
544 }
545 #[inline(always)]
546 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> {
547 self.dest.write_ascii(ascii);
548 self.dest
549 }
550 #[inline(always)]
551 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
552 self.dest.write_bmp(bmp);
553 self.dest
554 }
555 #[inline(always)]
556 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
557 self.dest.write_bmp_excl_ascii(bmp);
558 self.dest
559 }
560 #[inline(always)]
561 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> {
562 self.dest.write_upper_bmp(bmp);
563 self.dest
564 }
565 #[inline(always)]
566 pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> {
567 self.dest.write_astral(astral);
568 self.dest
569 }
570 #[inline(always)]
571 pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> {
572 self.dest.write_surrogate_pair(high, low);
573 self.dest
574 }
575 #[inline(always)]
576 pub fn write_big5_combination(
577 self,
578 combined: u16,
579 combining: u16,
580 ) -> &'a mut Utf16Destination<'b> {
581 self.dest.write_big5_combination(combined, combining);
582 self.dest
583 }
584 #[inline(always)]
585 pub fn commit(self) -> &'a mut Utf16Destination<'b> {
586 self.dest
587 }
588}
589
590pub struct Utf16Destination<'a> {
591 slice: &'a mut [u16],
592 pos: usize,
593}
594
595impl<'a> Utf16Destination<'a> {
596 #[inline(always)]
597 pub fn new(dst: &mut [u16]) -> Utf16Destination {
598 Utf16Destination { slice: dst, pos: 0 }
599 }
600 #[inline(always)]
601 pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> {
602 if self.pos < self.slice.len() {
603 Space::Available(Utf16BmpHandle::new(self))
604 } else {
605 Space::Full(self.written())
606 }
607 }
608 #[inline(always)]
609 pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> {
610 if self.pos + 1 < self.slice.len() {
611 Space::Available(Utf16AstralHandle::new(self))
612 } else {
613 Space::Full(self.written())
614 }
615 }
616 #[inline(always)]
617 pub fn written(&self) -> usize {
618 self.pos
619 }
620 #[inline(always)]
621 fn write_code_unit(&mut self, u: u16) {
622 unsafe {
623 *(self.slice.get_unchecked_mut(self.pos)) = u;
625 }
626 self.pos += 1;
627 }
628 #[inline(always)]
629 fn write_ascii(&mut self, ascii: u8) {
630 debug_assert!(ascii < 0x80);
631 self.write_code_unit(u16::from(ascii));
632 }
633 #[inline(always)]
634 fn write_bmp(&mut self, bmp: u16) {
635 self.write_code_unit(bmp);
636 }
637 #[inline(always)]
638 fn write_bmp_excl_ascii(&mut self, bmp: u16) {
639 debug_assert!(bmp >= 0x80);
640 self.write_code_unit(bmp);
641 }
642 #[inline(always)]
643 fn write_mid_bmp(&mut self, bmp: u16) {
644 debug_assert!(bmp >= 0x80); self.write_code_unit(bmp);
646 }
647 #[inline(always)]
648 fn write_upper_bmp(&mut self, bmp: u16) {
649 debug_assert!(bmp >= 0x80);
650 self.write_code_unit(bmp);
651 }
652 #[inline(always)]
653 fn write_astral(&mut self, astral: u32) {
654 debug_assert!(astral > 0xFFFF);
655 debug_assert!(astral <= 0x10_FFFF);
656 self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
657 self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
658 }
659 #[inline(always)]
660 fn write_surrogate_pair(&mut self, high: u16, low: u16) {
661 self.write_code_unit(high);
662 self.write_code_unit(low);
663 }
664 #[inline(always)]
665 fn write_big5_combination(&mut self, combined: u16, combining: u16) {
666 self.write_bmp_excl_ascii(combined);
667 self.write_bmp_excl_ascii(combining);
668 }
669 #[inline(always)]
671 pub fn copy_ascii_from_check_space_bmp<'b>(
672 &'b mut self,
673 source: &mut ByteSource,
674 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
675 let non_ascii_ret = {
676 let src_remaining = &source.slice[source.pos..];
677 let dst_remaining = &mut self.slice[self.pos..];
678 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
679 (DecoderResult::OutputFull, dst_remaining.len())
680 } else {
681 (DecoderResult::InputEmpty, src_remaining.len())
682 };
683 match unsafe {
686 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
687 } {
688 None => {
689 source.pos += length;
690 self.pos += length;
691 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
692 }
693 Some((non_ascii, consumed)) => {
695 source.pos += consumed;
696 self.pos += consumed;
697 source.pos += 1; non_ascii
700 }
701 }
702 };
703 CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
705 }
706 #[inline(always)]
708 pub fn copy_ascii_from_check_space_astral<'b>(
709 &'b mut self,
710 source: &mut ByteSource,
711 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
712 let non_ascii_ret = {
713 let dst_len = self.slice.len();
714 let src_remaining = &source.slice[source.pos..];
715 let dst_remaining = &mut self.slice[self.pos..];
716 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
717 (DecoderResult::OutputFull, dst_remaining.len())
718 } else {
719 (DecoderResult::InputEmpty, src_remaining.len())
720 };
721 match unsafe {
724 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
725 } {
726 None => {
727 source.pos += length;
728 self.pos += length;
729 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
730 }
731 Some((non_ascii, consumed)) => {
733 source.pos += consumed;
734 self.pos += consumed;
735 if self.pos + 1 < dst_len {
736 source.pos += 1; non_ascii
739 } else {
740 return CopyAsciiResult::Stop((
741 DecoderResult::OutputFull,
742 source.pos,
743 self.pos,
744 ));
745 }
746 }
747 }
748 };
749 CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
751 }
752 #[inline(always)]
753 pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
754 let src_remaining = &source.slice[source.pos..];
755 let dst_remaining = &mut self.slice[self.pos..];
756 let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
757 source.pos += read;
758 self.pos += written;
759 }
760 #[inline(always)]
761 pub fn copy_utf16_from<E: Endian>(
762 &mut self,
763 source: &mut ByteSource,
764 ) -> Option<(usize, usize)> {
765 let src_remaining = &source.slice[source.pos..];
766 let dst_remaining = &mut self.slice[self.pos..];
767
768 let mut src_unaligned = unsafe {
769 UnalignedU16Slice::new(
770 src_remaining.as_ptr(),
771 ::core::cmp::min(src_remaining.len() / 2, dst_remaining.len()),
772 )
773 };
774 if src_unaligned.len() == 0 {
775 return None;
776 }
777 let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1));
778 if super::in_range16(last_unit, 0xD800, 0xDC00) {
779 src_unaligned.trim_last();
783 }
784 let mut offset = 0usize;
785 loop {
786 if let Some((surrogate, bmp_len)) = {
787 let src_left = src_unaligned.tail(offset);
788 let dst_left = &mut dst_remaining[offset..src_unaligned.len()];
789 src_left.copy_bmp_to::<E>(dst_left)
790 } {
791 offset += bmp_len; let second_pos = offset + 1;
793 if surrogate > 0xDBFF || second_pos == src_unaligned.len() {
794 source.pos += second_pos * 2;
796 self.pos += offset;
797 return Some((source.pos, self.pos));
798 }
799 let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos));
800 if !super::in_range16(second, 0xDC00, 0xE000) {
801 source.pos += second_pos * 2;
803 self.pos += offset;
804 return Some((source.pos, self.pos));
805 }
806 dst_remaining[second_pos] = second;
808 offset += 2;
809 continue;
810 } else {
811 source.pos += src_unaligned.len() * 2;
812 self.pos += src_unaligned.len();
813 return None;
814 }
815 }
816 }
817}
818
819pub struct Utf8BmpHandle<'a, 'b>
822where
823 'b: 'a,
824{
825 dest: &'a mut Utf8Destination<'b>,
826}
827
828impl<'a, 'b> Utf8BmpHandle<'a, 'b>
829where
830 'b: 'a,
831{
832 #[inline(always)]
833 fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> {
834 Utf8BmpHandle { dest: dst }
835 }
836 #[inline(always)]
837 pub fn written(&self) -> usize {
838 self.dest.written()
839 }
840 #[inline(always)]
841 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
842 self.dest.write_ascii(ascii);
843 self.dest
844 }
845 #[inline(always)]
846 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
847 self.dest.write_bmp(bmp);
848 self.dest
849 }
850 #[inline(always)]
851 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
852 self.dest.write_bmp_excl_ascii(bmp);
853 self.dest
854 }
855 #[inline(always)]
856 pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
857 self.dest.write_mid_bmp(bmp);
858 self.dest
859 }
860 #[inline(always)]
861 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
862 self.dest.write_upper_bmp(bmp);
863 self.dest
864 }
865 #[inline(always)]
866 pub fn commit(self) -> &'a mut Utf8Destination<'b> {
867 self.dest
868 }
869}
870
871pub struct Utf8AstralHandle<'a, 'b>
872where
873 'b: 'a,
874{
875 dest: &'a mut Utf8Destination<'b>,
876}
877
878impl<'a, 'b> Utf8AstralHandle<'a, 'b>
879where
880 'b: 'a,
881{
882 #[inline(always)]
883 fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> {
884 Utf8AstralHandle { dest: dst }
885 }
886 #[inline(always)]
887 pub fn written(&self) -> usize {
888 self.dest.written()
889 }
890 #[inline(always)]
891 pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> {
892 self.dest.write_ascii(ascii);
893 self.dest
894 }
895 #[inline(always)]
896 pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
897 self.dest.write_bmp(bmp);
898 self.dest
899 }
900 #[inline(always)]
901 pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
902 self.dest.write_bmp_excl_ascii(bmp);
903 self.dest
904 }
905 #[inline(always)]
906 pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> {
907 self.dest.write_upper_bmp(bmp);
908 self.dest
909 }
910 #[inline(always)]
911 pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> {
912 self.dest.write_astral(astral);
913 self.dest
914 }
915 #[inline(always)]
916 pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> {
917 self.dest.write_surrogate_pair(high, low);
918 self.dest
919 }
920 #[inline(always)]
921 pub fn write_big5_combination(
922 self,
923 combined: u16,
924 combining: u16,
925 ) -> &'a mut Utf8Destination<'b> {
926 self.dest.write_big5_combination(combined, combining);
927 self.dest
928 }
929 #[inline(always)]
930 pub fn commit(self) -> &'a mut Utf8Destination<'b> {
931 self.dest
932 }
933}
934
935pub struct Utf8Destination<'a> {
936 slice: &'a mut [u8],
937 pos: usize,
938}
939
940impl<'a> Utf8Destination<'a> {
941 #[inline(always)]
942 pub fn new(dst: &mut [u8]) -> Utf8Destination {
943 Utf8Destination { slice: dst, pos: 0 }
944 }
945 #[inline(always)]
946 pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> {
947 if self.pos + 2 < self.slice.len() {
948 Space::Available(Utf8BmpHandle::new(self))
949 } else {
950 Space::Full(self.written())
951 }
952 }
953 #[inline(always)]
954 pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> {
955 if self.pos + 3 < self.slice.len() {
956 Space::Available(Utf8AstralHandle::new(self))
957 } else {
958 Space::Full(self.written())
959 }
960 }
961 #[inline(always)]
962 pub fn written(&self) -> usize {
963 self.pos
964 }
965 #[inline(always)]
966 fn write_code_unit(&mut self, u: u8) {
967 unsafe {
968 *(self.slice.get_unchecked_mut(self.pos)) = u;
970 }
971 self.pos += 1;
972 }
973 #[inline(always)]
974 fn write_ascii(&mut self, ascii: u8) {
975 debug_assert!(ascii < 0x80);
976 self.write_code_unit(ascii);
977 }
978 #[inline(always)]
979 fn write_bmp(&mut self, bmp: u16) {
980 if bmp < 0x80u16 {
981 self.write_ascii(bmp as u8);
982 } else if bmp < 0x800u16 {
983 self.write_mid_bmp(bmp);
984 } else {
985 self.write_upper_bmp(bmp);
986 }
987 }
988 #[inline(always)]
989 fn write_mid_bmp(&mut self, mid_bmp: u16) {
990 debug_assert!(mid_bmp >= 0x80);
991 debug_assert!(mid_bmp < 0x800);
992 self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8);
993 self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8);
994 }
995 #[inline(always)]
996 fn write_upper_bmp(&mut self, upper_bmp: u16) {
997 debug_assert!(upper_bmp >= 0x800);
998 self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8);
999 self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8);
1000 self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8);
1001 }
1002 #[inline(always)]
1003 fn write_bmp_excl_ascii(&mut self, bmp: u16) {
1004 if bmp < 0x800u16 {
1005 self.write_mid_bmp(bmp);
1006 } else {
1007 self.write_upper_bmp(bmp);
1008 }
1009 }
1010 #[inline(always)]
1011 fn write_astral(&mut self, astral: u32) {
1012 debug_assert!(astral > 0xFFFF);
1013 debug_assert!(astral <= 0x10_FFFF);
1014 self.write_code_unit(((astral >> 18) | 0xF0) as u8);
1015 self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8);
1016 self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8);
1017 self.write_code_unit(((astral & 0x3F) | 0x80) as u8);
1018 }
1019 #[inline(always)]
1020 pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
1021 self.write_astral(
1022 (u32::from(high) << 10) + u32::from(low)
1023 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1024 );
1025 }
1026 #[inline(always)]
1027 fn write_big5_combination(&mut self, combined: u16, combining: u16) {
1028 self.write_mid_bmp(combined);
1029 self.write_mid_bmp(combining);
1030 }
1031 #[inline(always)]
1032 pub fn copy_ascii_from_check_space_bmp<'b>(
1033 &'b mut self,
1034 source: &mut ByteSource,
1035 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> {
1036 let non_ascii_ret = {
1037 let dst_len = self.slice.len();
1038 let src_remaining = &source.slice[source.pos..];
1039 let dst_remaining = &mut self.slice[self.pos..];
1040 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1041 (DecoderResult::OutputFull, dst_remaining.len())
1042 } else {
1043 (DecoderResult::InputEmpty, src_remaining.len())
1044 };
1045 match unsafe {
1046 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1047 } {
1048 None => {
1049 source.pos += length;
1050 self.pos += length;
1051 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1052 }
1053 Some((non_ascii, consumed)) => {
1054 source.pos += consumed;
1055 self.pos += consumed;
1056 if self.pos + 2 < dst_len {
1057 source.pos += 1; non_ascii
1059 } else {
1060 return CopyAsciiResult::Stop((
1061 DecoderResult::OutputFull,
1062 source.pos,
1063 self.pos,
1064 ));
1065 }
1066 }
1067 }
1068 };
1069 CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self)))
1070 }
1071 #[inline(always)]
1072 pub fn copy_ascii_from_check_space_astral<'b>(
1073 &'b mut self,
1074 source: &mut ByteSource,
1075 ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> {
1076 let non_ascii_ret = {
1077 let dst_len = self.slice.len();
1078 let src_remaining = &source.slice[source.pos..];
1079 let dst_remaining = &mut self.slice[self.pos..];
1080 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1081 (DecoderResult::OutputFull, dst_remaining.len())
1082 } else {
1083 (DecoderResult::InputEmpty, src_remaining.len())
1084 };
1085 match unsafe {
1086 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1087 } {
1088 None => {
1089 source.pos += length;
1090 self.pos += length;
1091 return CopyAsciiResult::Stop((pending, source.pos, self.pos));
1092 }
1093 Some((non_ascii, consumed)) => {
1094 source.pos += consumed;
1095 self.pos += consumed;
1096 if self.pos + 3 < dst_len {
1097 source.pos += 1; non_ascii
1099 } else {
1100 return CopyAsciiResult::Stop((
1101 DecoderResult::OutputFull,
1102 source.pos,
1103 self.pos,
1104 ));
1105 }
1106 }
1107 }
1108 };
1109 CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self)))
1110 }
1111 #[inline(always)]
1112 pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
1113 let src_remaining = &source.slice[source.pos..];
1114 let dst_remaining = &mut self.slice[self.pos..];
1115 let min_len = ::core::cmp::min(src_remaining.len(), dst_remaining.len());
1116 let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
1119 (&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
1120 source.pos += valid_len;
1121 self.pos += valid_len;
1122 }
1123 #[inline(always)]
1124 pub fn copy_utf16_from<E: Endian>(
1125 &mut self,
1126 source: &mut ByteSource,
1127 ) -> Option<(usize, usize)> {
1128 let src_remaining = &source.slice[source.pos..];
1129 let dst_remaining = &mut self.slice[self.pos..];
1130
1131 let mut src_unaligned =
1132 unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) };
1133 if src_unaligned.len() == 0 {
1134 return None;
1135 }
1136 let mut last_unit = src_unaligned.at(src_unaligned.len() - 1);
1137 if E::OPPOSITE_ENDIAN {
1138 last_unit = last_unit.swap_bytes();
1139 }
1140 if super::in_range16(last_unit, 0xD800, 0xDC00) {
1141 src_unaligned.trim_last();
1145 }
1146 let (read, written, had_error) =
1147 convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining);
1148 source.pos += read * 2;
1149 self.pos += written;
1150 if had_error {
1151 Some((source.pos, self.pos))
1152 } else {
1153 None
1154 }
1155 }
1156}
1157
1158pub struct Utf16Source<'a> {
1161 slice: &'a [u16],
1162 pos: usize,
1163 old_pos: usize,
1164}
1165
1166impl<'a> Utf16Source<'a> {
1167 #[inline(always)]
1168 pub fn new(src: &[u16]) -> Utf16Source {
1169 Utf16Source {
1170 slice: src,
1171 pos: 0,
1172 old_pos: 0,
1173 }
1174 }
1175 #[inline(always)]
1176 pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
1177 if self.pos < self.slice.len() {
1178 Space::Available(Utf16ReadHandle::new(self))
1179 } else {
1180 Space::Full(self.consumed())
1181 }
1182 }
1183 #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1184 #[inline(always)]
1185 fn read(&mut self) -> char {
1186 self.old_pos = self.pos;
1187 let unit = self.slice[self.pos];
1188 self.pos += 1;
1189 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1190 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1191 return unsafe { ::core::char::from_u32_unchecked(u32::from(unit)) };
1192 }
1193 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1194 if self.pos < self.slice.len() {
1196 let second = self.slice[self.pos];
1197 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1198 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1199 self.pos += 1;
1201 return unsafe {
1202 ::core::char::from_u32_unchecked(
1203 (u32::from(unit) << 10) + u32::from(second)
1204 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1205 )
1206 };
1207 }
1208 }
1212 }
1214 '\u{FFFD}'
1216 }
1217 #[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1218 #[inline(always)]
1219 fn read_enum(&mut self) -> Unicode {
1220 self.old_pos = self.pos;
1221 let unit = self.slice[self.pos];
1222 self.pos += 1;
1223 if unit < 0x80 {
1224 return Unicode::Ascii(unit as u8);
1225 }
1226 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1227 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1228 return Unicode::NonAscii(NonAscii::BmpExclAscii(unit));
1229 }
1230 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1231 if self.pos < self.slice.len() {
1233 let second = self.slice[self.pos];
1234 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1235 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1236 self.pos += 1;
1238 return Unicode::NonAscii(NonAscii::Astral(unsafe {
1239 ::core::char::from_u32_unchecked(
1240 (u32::from(unit) << 10) + u32::from(second)
1241 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1242 )
1243 }));
1244 }
1245 }
1249 }
1251 Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
1253 }
1254 #[inline(always)]
1255 fn unread(&mut self) -> usize {
1256 self.pos = self.old_pos;
1257 self.pos
1258 }
1259 #[inline(always)]
1260 pub fn consumed(&self) -> usize {
1261 self.pos
1262 }
1263 #[inline(always)]
1264 pub fn copy_ascii_to_check_space_two<'b>(
1265 &mut self,
1266 dest: &'b mut ByteDestination<'a>,
1267 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1268 let non_ascii_ret = {
1269 let dst_len = dest.slice.len();
1270 let src_remaining = &self.slice[self.pos..];
1271 let dst_remaining = &mut dest.slice[dest.pos..];
1272 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1273 (EncoderResult::OutputFull, dst_remaining.len())
1274 } else {
1275 (EncoderResult::InputEmpty, src_remaining.len())
1276 };
1277 match unsafe {
1278 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1279 } {
1280 None => {
1281 self.pos += length;
1282 dest.pos += length;
1283 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1284 }
1285 Some((non_ascii, consumed)) => {
1286 self.pos += consumed;
1287 dest.pos += consumed;
1288 if dest.pos + 1 < dst_len {
1289 self.pos += 1; let unit = non_ascii;
1291 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1292 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1293 NonAscii::BmpExclAscii(unit)
1294 } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1295 if self.pos < self.slice.len() {
1297 let second = self.slice[self.pos];
1298 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1299 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1300 self.pos += 1;
1302 NonAscii::Astral(unsafe {
1303 ::core::char::from_u32_unchecked(
1304 (u32::from(unit) << 10) + u32::from(second)
1305 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
1306 )
1307 })
1308 } else {
1309 NonAscii::BmpExclAscii(0xFFFDu16)
1312 }
1313 } else {
1314 NonAscii::BmpExclAscii(0xFFFDu16)
1316 }
1317 } else {
1318 NonAscii::BmpExclAscii(0xFFFDu16)
1320 }
1321 } else {
1322 return CopyAsciiResult::Stop((
1323 EncoderResult::OutputFull,
1324 self.pos,
1325 dest.pos,
1326 ));
1327 }
1328 }
1329 }
1330 };
1331 CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1332 }
1333 #[inline(always)]
1334 pub fn copy_ascii_to_check_space_four<'b>(
1335 &mut self,
1336 dest: &'b mut ByteDestination<'a>,
1337 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1338 let non_ascii_ret = {
1339 let dst_len = dest.slice.len();
1340 let src_remaining = &self.slice[self.pos..];
1341 let dst_remaining = &mut dest.slice[dest.pos..];
1342 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1343 (EncoderResult::OutputFull, dst_remaining.len())
1344 } else {
1345 (EncoderResult::InputEmpty, src_remaining.len())
1346 };
1347 match unsafe {
1348 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1349 } {
1350 None => {
1351 self.pos += length;
1352 dest.pos += length;
1353 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1354 }
1355 Some((non_ascii, consumed)) => {
1356 self.pos += consumed;
1357 dest.pos += consumed;
1358 if dest.pos + 3 < dst_len {
1359 self.pos += 1; let unit = non_ascii;
1361 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
1362 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
1363 NonAscii::BmpExclAscii(unit)
1364 } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
1365 if self.pos == self.slice.len() {
1367 NonAscii::BmpExclAscii(0xFFFDu16)
1369 } else {
1370 let second = self.slice[self.pos];
1371 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
1372 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
1373 self.pos += 1;
1375 NonAscii::Astral(unsafe {
1376 ::core::char::from_u32_unchecked(
1377 (u32::from(unit) << 10) + u32::from(second)
1378 - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
1379 )
1380 })
1381 } else {
1382 NonAscii::BmpExclAscii(0xFFFDu16)
1385 }
1386 }
1387 } else {
1388 NonAscii::BmpExclAscii(0xFFFDu16)
1390 }
1391 } else {
1392 return CopyAsciiResult::Stop((
1393 EncoderResult::OutputFull,
1394 self.pos,
1395 dest.pos,
1396 ));
1397 }
1398 }
1399 }
1400 };
1401 CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1402 }
1403}
1404
1405pub struct Utf16ReadHandle<'a, 'b>
1406where
1407 'b: 'a,
1408{
1409 source: &'a mut Utf16Source<'b>,
1410}
1411
1412impl<'a, 'b> Utf16ReadHandle<'a, 'b>
1413where
1414 'b: 'a,
1415{
1416 #[inline(always)]
1417 fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> {
1418 Utf16ReadHandle { source: src }
1419 }
1420 #[inline(always)]
1421 pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
1422 let character = self.source.read();
1423 let handle = Utf16UnreadHandle::new(self.source);
1424 (character, handle)
1425 }
1426 #[inline(always)]
1427 pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
1428 let character = self.source.read_enum();
1429 let handle = Utf16UnreadHandle::new(self.source);
1430 (character, handle)
1431 }
1432 #[inline(always)]
1433 pub fn consumed(&self) -> usize {
1434 self.source.consumed()
1435 }
1436}
1437
1438pub struct Utf16UnreadHandle<'a, 'b>
1439where
1440 'b: 'a,
1441{
1442 source: &'a mut Utf16Source<'b>,
1443}
1444
1445impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
1446where
1447 'b: 'a,
1448{
1449 #[inline(always)]
1450 fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
1451 Utf16UnreadHandle { source: src }
1452 }
1453 #[inline(always)]
1454 pub fn unread(self) -> usize {
1455 self.source.unread()
1456 }
1457 #[inline(always)]
1458 pub fn consumed(&self) -> usize {
1459 self.source.consumed()
1460 }
1461 #[inline(always)]
1462 pub fn commit(self) -> &'a mut Utf16Source<'b> {
1463 self.source
1464 }
1465}
1466
1467pub struct Utf8Source<'a> {
1470 slice: &'a [u8],
1471 pos: usize,
1472 old_pos: usize,
1473}
1474
1475impl<'a> Utf8Source<'a> {
1476 #[inline(always)]
1477 pub fn new(src: &str) -> Utf8Source {
1478 Utf8Source {
1479 slice: src.as_bytes(),
1480 pos: 0,
1481 old_pos: 0,
1482 }
1483 }
1484 #[inline(always)]
1485 pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> {
1486 if self.pos < self.slice.len() {
1487 Space::Available(Utf8ReadHandle::new(self))
1488 } else {
1489 Space::Full(self.consumed())
1490 }
1491 }
1492 #[inline(always)]
1493 fn read(&mut self) -> char {
1494 self.old_pos = self.pos;
1495 let unit = self.slice[self.pos];
1496 if unit < 0x80 {
1497 self.pos += 1;
1498 return char::from(unit);
1499 }
1500 if unit < 0xE0 {
1501 let point =
1502 ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F);
1503 self.pos += 2;
1504 return unsafe { ::core::char::from_u32_unchecked(point) };
1505 }
1506 if unit < 0xF0 {
1507 let point = ((u32::from(unit) & 0xF) << 12)
1508 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1509 | (u32::from(self.slice[self.pos + 2]) & 0x3F);
1510 self.pos += 3;
1511 return unsafe { ::core::char::from_u32_unchecked(point) };
1512 }
1513 let point = ((u32::from(unit) & 0x7) << 18)
1514 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1515 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1516 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1517 self.pos += 4;
1518 unsafe { ::core::char::from_u32_unchecked(point) }
1519 }
1520 #[inline(always)]
1521 fn read_enum(&mut self) -> Unicode {
1522 self.old_pos = self.pos;
1523 let unit = self.slice[self.pos];
1524 if unit < 0x80 {
1525 self.pos += 1;
1526 return Unicode::Ascii(unit);
1527 }
1528 if unit < 0xE0 {
1529 let point =
1530 ((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1531 self.pos += 2;
1532 return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1533 }
1534 if unit < 0xF0 {
1535 let point = ((u16::from(unit) & 0xF) << 12)
1536 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1537 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1538 self.pos += 3;
1539 return Unicode::NonAscii(NonAscii::BmpExclAscii(point));
1540 }
1541 let point = ((u32::from(unit) & 0x7) << 18)
1542 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1543 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1544 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1545 self.pos += 4;
1546 Unicode::NonAscii(NonAscii::Astral(unsafe {
1547 ::core::char::from_u32_unchecked(point)
1548 }))
1549 }
1550 #[inline(always)]
1551 fn unread(&mut self) -> usize {
1552 self.pos = self.old_pos;
1553 self.pos
1554 }
1555 #[inline(always)]
1556 pub fn consumed(&self) -> usize {
1557 self.pos
1558 }
1559 #[inline(always)]
1560 pub fn copy_ascii_to_check_space_one<'b>(
1561 &mut self,
1562 dest: &'b mut ByteDestination<'a>,
1563 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> {
1564 let non_ascii_ret = {
1565 let src_remaining = &self.slice[self.pos..];
1566 let dst_remaining = &mut dest.slice[dest.pos..];
1567 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1568 (EncoderResult::OutputFull, dst_remaining.len())
1569 } else {
1570 (EncoderResult::InputEmpty, src_remaining.len())
1571 };
1572 match unsafe {
1573 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1574 } {
1575 None => {
1576 self.pos += length;
1577 dest.pos += length;
1578 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1579 }
1580 Some((non_ascii, consumed)) => {
1581 self.pos += consumed;
1582 dest.pos += consumed;
1583 if non_ascii < 0xE0 {
1586 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1587 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1588 self.pos += 2;
1589 NonAscii::BmpExclAscii(point)
1590 } else if non_ascii < 0xF0 {
1591 let point = ((u16::from(non_ascii) & 0xF) << 12)
1592 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1593 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1594 self.pos += 3;
1595 NonAscii::BmpExclAscii(point)
1596 } else {
1597 let point = ((u32::from(non_ascii) & 0x7) << 18)
1598 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1599 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1600 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1601 self.pos += 4;
1602 NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1603 }
1604 }
1605 }
1606 };
1607 CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest)))
1608 }
1609 #[inline(always)]
1610 pub fn copy_ascii_to_check_space_two<'b>(
1611 &mut self,
1612 dest: &'b mut ByteDestination<'a>,
1613 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> {
1614 let non_ascii_ret = {
1615 let dst_len = dest.slice.len();
1616 let src_remaining = &self.slice[self.pos..];
1617 let dst_remaining = &mut dest.slice[dest.pos..];
1618 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1619 (EncoderResult::OutputFull, dst_remaining.len())
1620 } else {
1621 (EncoderResult::InputEmpty, src_remaining.len())
1622 };
1623 match unsafe {
1624 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1625 } {
1626 None => {
1627 self.pos += length;
1628 dest.pos += length;
1629 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1630 }
1631 Some((non_ascii, consumed)) => {
1632 self.pos += consumed;
1633 dest.pos += consumed;
1634 if dest.pos + 1 < dst_len {
1635 if non_ascii < 0xE0 {
1636 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1637 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1638 self.pos += 2;
1639 NonAscii::BmpExclAscii(point)
1640 } else if non_ascii < 0xF0 {
1641 let point = ((u16::from(non_ascii) & 0xF) << 12)
1642 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1643 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1644 self.pos += 3;
1645 NonAscii::BmpExclAscii(point)
1646 } else {
1647 let point = ((u32::from(non_ascii) & 0x7) << 18)
1648 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1649 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1650 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1651 self.pos += 4;
1652 NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1653 }
1654 } else {
1655 return CopyAsciiResult::Stop((
1656 EncoderResult::OutputFull,
1657 self.pos,
1658 dest.pos,
1659 ));
1660 }
1661 }
1662 }
1663 };
1664 CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest)))
1665 }
1666 #[inline(always)]
1667 pub fn copy_ascii_to_check_space_four<'b>(
1668 &mut self,
1669 dest: &'b mut ByteDestination<'a>,
1670 ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> {
1671 let non_ascii_ret = {
1672 let dst_len = dest.slice.len();
1673 let src_remaining = &self.slice[self.pos..];
1674 let dst_remaining = &mut dest.slice[dest.pos..];
1675 let (pending, length) = if dst_remaining.len() < src_remaining.len() {
1676 (EncoderResult::OutputFull, dst_remaining.len())
1677 } else {
1678 (EncoderResult::InputEmpty, src_remaining.len())
1679 };
1680 match unsafe {
1681 ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1682 } {
1683 None => {
1684 self.pos += length;
1685 dest.pos += length;
1686 return CopyAsciiResult::Stop((pending, self.pos, dest.pos));
1687 }
1688 Some((non_ascii, consumed)) => {
1689 self.pos += consumed;
1690 dest.pos += consumed;
1691 if dest.pos + 3 < dst_len {
1692 if non_ascii < 0xE0 {
1693 let point = ((u16::from(non_ascii) & 0x1F) << 6)
1694 | (u16::from(self.slice[self.pos + 1]) & 0x3F);
1695 self.pos += 2;
1696 NonAscii::BmpExclAscii(point)
1697 } else if non_ascii < 0xF0 {
1698 let point = ((u16::from(non_ascii) & 0xF) << 12)
1699 | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6)
1700 | (u16::from(self.slice[self.pos + 2]) & 0x3F);
1701 self.pos += 3;
1702 NonAscii::BmpExclAscii(point)
1703 } else {
1704 let point = ((u32::from(non_ascii) & 0x7) << 18)
1705 | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12)
1706 | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6)
1707 | (u32::from(self.slice[self.pos + 3]) & 0x3F);
1708 self.pos += 4;
1709 NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) })
1710 }
1711 } else {
1712 return CopyAsciiResult::Stop((
1713 EncoderResult::OutputFull,
1714 self.pos,
1715 dest.pos,
1716 ));
1717 }
1718 }
1719 }
1720 };
1721 CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest)))
1722 }
1723}
1724
1725pub struct Utf8ReadHandle<'a, 'b>
1726where
1727 'b: 'a,
1728{
1729 source: &'a mut Utf8Source<'b>,
1730}
1731
1732impl<'a, 'b> Utf8ReadHandle<'a, 'b>
1733where
1734 'b: 'a,
1735{
1736 #[inline(always)]
1737 fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
1738 Utf8ReadHandle { source: src }
1739 }
1740 #[inline(always)]
1741 pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
1742 let character = self.source.read();
1743 let handle = Utf8UnreadHandle::new(self.source);
1744 (character, handle)
1745 }
1746 #[inline(always)]
1747 pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
1748 let character = self.source.read_enum();
1749 let handle = Utf8UnreadHandle::new(self.source);
1750 (character, handle)
1751 }
1752 #[inline(always)]
1753 pub fn consumed(&self) -> usize {
1754 self.source.consumed()
1755 }
1756}
1757
1758pub struct Utf8UnreadHandle<'a, 'b>
1759where
1760 'b: 'a,
1761{
1762 source: &'a mut Utf8Source<'b>,
1763}
1764
1765impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
1766where
1767 'b: 'a,
1768{
1769 #[inline(always)]
1770 fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
1771 Utf8UnreadHandle { source: src }
1772 }
1773 #[inline(always)]
1774 pub fn unread(self) -> usize {
1775 self.source.unread()
1776 }
1777 #[inline(always)]
1778 pub fn consumed(&self) -> usize {
1779 self.source.consumed()
1780 }
1781 #[inline(always)]
1782 pub fn commit(self) -> &'a mut Utf8Source<'b> {
1783 self.source
1784 }
1785}
1786
1787pub struct ByteOneHandle<'a, 'b>
1790where
1791 'b: 'a,
1792{
1793 dest: &'a mut ByteDestination<'b>,
1794}
1795
1796impl<'a, 'b> ByteOneHandle<'a, 'b>
1797where
1798 'b: 'a,
1799{
1800 #[inline(always)]
1801 fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> {
1802 ByteOneHandle { dest: dst }
1803 }
1804 #[inline(always)]
1805 pub fn written(&self) -> usize {
1806 self.dest.written()
1807 }
1808 #[inline(always)]
1809 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1810 self.dest.write_one(first);
1811 self.dest
1812 }
1813}
1814
1815pub struct ByteTwoHandle<'a, 'b>
1816where
1817 'b: 'a,
1818{
1819 dest: &'a mut ByteDestination<'b>,
1820}
1821
1822impl<'a, 'b> ByteTwoHandle<'a, 'b>
1823where
1824 'b: 'a,
1825{
1826 #[inline(always)]
1827 fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> {
1828 ByteTwoHandle { dest: dst }
1829 }
1830 #[inline(always)]
1831 pub fn written(&self) -> usize {
1832 self.dest.written()
1833 }
1834 #[inline(always)]
1835 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1836 self.dest.write_one(first);
1837 self.dest
1838 }
1839 #[inline(always)]
1840 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1841 self.dest.write_two(first, second);
1842 self.dest
1843 }
1844}
1845
1846pub struct ByteThreeHandle<'a, 'b>
1847where
1848 'b: 'a,
1849{
1850 dest: &'a mut ByteDestination<'b>,
1851}
1852
1853impl<'a, 'b> ByteThreeHandle<'a, 'b>
1854where
1855 'b: 'a,
1856{
1857 #[inline(always)]
1858 fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> {
1859 ByteThreeHandle { dest: dst }
1860 }
1861 #[inline(always)]
1862 pub fn written(&self) -> usize {
1863 self.dest.written()
1864 }
1865 #[inline(always)]
1866 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1867 self.dest.write_one(first);
1868 self.dest
1869 }
1870 #[inline(always)]
1871 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1872 self.dest.write_two(first, second);
1873 self.dest
1874 }
1875 #[inline(always)]
1876 pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> {
1877 self.dest.write_three(first, second, third);
1878 self.dest
1879 }
1880 #[inline(always)]
1881 pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize {
1882 self.dest.write_three(first, second, third);
1883 self.dest.written()
1884 }
1885}
1886
1887pub struct ByteFourHandle<'a, 'b>
1888where
1889 'b: 'a,
1890{
1891 dest: &'a mut ByteDestination<'b>,
1892}
1893
1894impl<'a, 'b> ByteFourHandle<'a, 'b>
1895where
1896 'b: 'a,
1897{
1898 #[inline(always)]
1899 fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> {
1900 ByteFourHandle { dest: dst }
1901 }
1902 #[inline(always)]
1903 pub fn written(&self) -> usize {
1904 self.dest.written()
1905 }
1906 #[inline(always)]
1907 pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> {
1908 self.dest.write_one(first);
1909 self.dest
1910 }
1911 #[inline(always)]
1912 pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> {
1913 self.dest.write_two(first, second);
1914 self.dest
1915 }
1916 #[inline(always)]
1917 pub fn write_four(
1918 self,
1919 first: u8,
1920 second: u8,
1921 third: u8,
1922 fourth: u8,
1923 ) -> &'a mut ByteDestination<'b> {
1924 self.dest.write_four(first, second, third, fourth);
1925 self.dest
1926 }
1927}
1928
1929pub struct ByteDestination<'a> {
1930 slice: &'a mut [u8],
1931 pos: usize,
1932}
1933
1934impl<'a> ByteDestination<'a> {
1935 #[inline(always)]
1936 pub fn new(dst: &mut [u8]) -> ByteDestination {
1937 ByteDestination { slice: dst, pos: 0 }
1938 }
1939 #[inline(always)]
1940 pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
1941 if self.pos < self.slice.len() {
1942 Space::Available(ByteOneHandle::new(self))
1943 } else {
1944 Space::Full(self.written())
1945 }
1946 }
1947 #[inline(always)]
1948 pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> {
1949 if self.pos + 1 < self.slice.len() {
1950 Space::Available(ByteTwoHandle::new(self))
1951 } else {
1952 Space::Full(self.written())
1953 }
1954 }
1955 #[inline(always)]
1956 pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> {
1957 if self.pos + 2 < self.slice.len() {
1958 Space::Available(ByteThreeHandle::new(self))
1959 } else {
1960 Space::Full(self.written())
1961 }
1962 }
1963 #[inline(always)]
1964 pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> {
1965 if self.pos + 3 < self.slice.len() {
1966 Space::Available(ByteFourHandle::new(self))
1967 } else {
1968 Space::Full(self.written())
1969 }
1970 }
1971 #[inline(always)]
1972 pub fn written(&self) -> usize {
1973 self.pos
1974 }
1975 #[inline(always)]
1976 fn write_one(&mut self, first: u8) {
1977 self.slice[self.pos] = first;
1978 self.pos += 1;
1979 }
1980 #[inline(always)]
1981 fn write_two(&mut self, first: u8, second: u8) {
1982 self.slice[self.pos] = first;
1983 self.slice[self.pos + 1] = second;
1984 self.pos += 2;
1985 }
1986 #[inline(always)]
1987 fn write_three(&mut self, first: u8, second: u8, third: u8) {
1988 self.slice[self.pos] = first;
1989 self.slice[self.pos + 1] = second;
1990 self.slice[self.pos + 2] = third;
1991 self.pos += 3;
1992 }
1993 #[inline(always)]
1994 fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) {
1995 self.slice[self.pos] = first;
1996 self.slice[self.pos + 1] = second;
1997 self.slice[self.pos + 2] = third;
1998 self.slice[self.pos + 3] = fourth;
1999 self.pos += 4;
2000 }
2001}