httparse/simd/
avx2.rs

1use crate::iter::Bytes;
2
3#[inline]
4#[target_feature(enable = "avx2")]
5pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
6    while bytes.as_ref().len() >= 32 {
7
8        let advance = match_url_char_32_avx(bytes.as_ref());
9
10        bytes.advance(advance);
11
12        if advance != 32 {
13            return;
14        }
15    }
16    // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
17    super::swar::match_uri_vectored(bytes)
18}
19
20#[inline(always)]
21#[allow(non_snake_case, overflowing_literals)]
22#[allow(unused)]
23unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
24    debug_assert!(buf.len() >= 32);
25
26    #[cfg(target_arch = "x86")]
27    use core::arch::x86::*;
28    #[cfg(target_arch = "x86_64")]
29    use core::arch::x86_64::*;
30
31    let ptr = buf.as_ptr();
32
33    // %x21-%x7e %x80-%xff
34    let DEL: __m256i = _mm256_set1_epi8(0x7f);
35    let LOW: __m256i = _mm256_set1_epi8(0x21);
36
37    let dat = _mm256_lddqu_si256(ptr as *const _);
38    // unsigned comparison dat >= LOW
39    let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
40    let del = _mm256_cmpeq_epi8(dat, DEL);
41    let bit = _mm256_andnot_si256(del, low);
42    let res = _mm256_movemask_epi8(bit) as u32;
43    // TODO: use .trailing_ones() once MSRV >= 1.46
44    (!res).trailing_zeros() as usize
45}
46
47#[target_feature(enable = "avx2")]
48pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
49    while bytes.as_ref().len() >= 32 {
50        let advance = match_header_value_char_32_avx(bytes.as_ref());
51        bytes.advance(advance);
52
53        if advance != 32 {
54            return;
55        }
56    }
57    // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
58    super::swar::match_header_value_vectored(bytes)
59}
60
61#[inline(always)]
62#[allow(non_snake_case)]
63#[allow(unused)]
64unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
65    debug_assert!(buf.len() >= 32);
66
67    #[cfg(target_arch = "x86")]
68    use core::arch::x86::*;
69    #[cfg(target_arch = "x86_64")]
70    use core::arch::x86_64::*;
71
72    let ptr = buf.as_ptr();
73
74    // %x09 %x20-%x7e %x80-%xff
75    let TAB: __m256i = _mm256_set1_epi8(0x09);
76    let DEL: __m256i = _mm256_set1_epi8(0x7f);
77    let LOW: __m256i = _mm256_set1_epi8(0x20);
78
79    let dat = _mm256_lddqu_si256(ptr as *const _);
80    // unsigned comparison dat >= LOW
81    let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
82    let tab = _mm256_cmpeq_epi8(dat, TAB);
83    let del = _mm256_cmpeq_epi8(dat, DEL);
84    let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab));
85    let res = _mm256_movemask_epi8(bit) as u32;
86    // TODO: use .trailing_ones() once MSRV >= 1.46
87    (!res).trailing_zeros() as usize
88}
89
90#[test]
91fn avx2_code_matches_uri_chars_table() {
92    if !is_x86_feature_detected!("avx2") {
93        return;
94    }
95
96    #[allow(clippy::undocumented_unsafe_blocks)]
97    unsafe {
98        assert!(byte_is_allowed(b'_', match_uri_vectored));
99
100        for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
101            assert_eq!(
102                byte_is_allowed(b as u8, match_uri_vectored), allowed,
103                "byte_is_allowed({:?}) should be {:?}", b, allowed,
104            );
105        }
106    }
107}
108
109#[test]
110fn avx2_code_matches_header_value_chars_table() {
111    if !is_x86_feature_detected!("avx2") {
112        return;
113    }
114
115    #[allow(clippy::undocumented_unsafe_blocks)]
116    unsafe {
117        assert!(byte_is_allowed(b'_', match_header_value_vectored));
118
119        for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
120            assert_eq!(
121                byte_is_allowed(b as u8, match_header_value_vectored), allowed,
122                "byte_is_allowed({:?}) should be {:?}", b, allowed,
123            );
124        }
125    }
126}
127
128#[cfg(test)]
129unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
130    let slice = [
131        b'_', b'_', b'_', b'_',
132        b'_', b'_', b'_', b'_',
133        b'_', b'_', b'_', b'_',
134        b'_', b'_', b'_', b'_',
135        b'_', b'_', b'_', b'_',
136        b'_', b'_', b'_', b'_',
137        b'_', b'_', byte, b'_',
138        b'_', b'_', b'_', b'_',
139    ];
140    let mut bytes = Bytes::new(&slice);
141
142    f(&mut bytes);
143
144    match bytes.pos() {
145        32 => true,
146        26 => false,
147        _ => unreachable!(),
148    }
149}