httparse/simd/
avx2.rs
1use crate::iter::Bytes;
2
3#[inline]
4#[target_feature(enable = "avx2")]
5pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
6 while bytes.as_ref().len() >= 32 {
7
8 let advance = match_url_char_32_avx(bytes.as_ref());
9
10 bytes.advance(advance);
11
12 if advance != 32 {
13 return;
14 }
15 }
16 super::swar::match_uri_vectored(bytes)
18}
19
20#[inline(always)]
21#[allow(non_snake_case, overflowing_literals)]
22#[allow(unused)]
23unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
24 debug_assert!(buf.len() >= 32);
25
26 #[cfg(target_arch = "x86")]
27 use core::arch::x86::*;
28 #[cfg(target_arch = "x86_64")]
29 use core::arch::x86_64::*;
30
31 let ptr = buf.as_ptr();
32
33 let DEL: __m256i = _mm256_set1_epi8(0x7f);
35 let LOW: __m256i = _mm256_set1_epi8(0x21);
36
37 let dat = _mm256_lddqu_si256(ptr as *const _);
38 let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
40 let del = _mm256_cmpeq_epi8(dat, DEL);
41 let bit = _mm256_andnot_si256(del, low);
42 let res = _mm256_movemask_epi8(bit) as u32;
43 (!res).trailing_zeros() as usize
45}
46
47#[target_feature(enable = "avx2")]
48pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
49 while bytes.as_ref().len() >= 32 {
50 let advance = match_header_value_char_32_avx(bytes.as_ref());
51 bytes.advance(advance);
52
53 if advance != 32 {
54 return;
55 }
56 }
57 super::swar::match_header_value_vectored(bytes)
59}
60
61#[inline(always)]
62#[allow(non_snake_case)]
63#[allow(unused)]
64unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
65 debug_assert!(buf.len() >= 32);
66
67 #[cfg(target_arch = "x86")]
68 use core::arch::x86::*;
69 #[cfg(target_arch = "x86_64")]
70 use core::arch::x86_64::*;
71
72 let ptr = buf.as_ptr();
73
74 let TAB: __m256i = _mm256_set1_epi8(0x09);
76 let DEL: __m256i = _mm256_set1_epi8(0x7f);
77 let LOW: __m256i = _mm256_set1_epi8(0x20);
78
79 let dat = _mm256_lddqu_si256(ptr as *const _);
80 let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
82 let tab = _mm256_cmpeq_epi8(dat, TAB);
83 let del = _mm256_cmpeq_epi8(dat, DEL);
84 let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab));
85 let res = _mm256_movemask_epi8(bit) as u32;
86 (!res).trailing_zeros() as usize
88}
89
90#[test]
91fn avx2_code_matches_uri_chars_table() {
92 if !is_x86_feature_detected!("avx2") {
93 return;
94 }
95
96 #[allow(clippy::undocumented_unsafe_blocks)]
97 unsafe {
98 assert!(byte_is_allowed(b'_', match_uri_vectored));
99
100 for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
101 assert_eq!(
102 byte_is_allowed(b as u8, match_uri_vectored), allowed,
103 "byte_is_allowed({:?}) should be {:?}", b, allowed,
104 );
105 }
106 }
107}
108
109#[test]
110fn avx2_code_matches_header_value_chars_table() {
111 if !is_x86_feature_detected!("avx2") {
112 return;
113 }
114
115 #[allow(clippy::undocumented_unsafe_blocks)]
116 unsafe {
117 assert!(byte_is_allowed(b'_', match_header_value_vectored));
118
119 for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
120 assert_eq!(
121 byte_is_allowed(b as u8, match_header_value_vectored), allowed,
122 "byte_is_allowed({:?}) should be {:?}", b, allowed,
123 );
124 }
125 }
126}
127
128#[cfg(test)]
129unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
130 let slice = [
131 b'_', b'_', b'_', b'_',
132 b'_', b'_', b'_', b'_',
133 b'_', b'_', b'_', b'_',
134 b'_', b'_', b'_', b'_',
135 b'_', b'_', b'_', b'_',
136 b'_', b'_', b'_', b'_',
137 b'_', b'_', byte, b'_',
138 b'_', b'_', b'_', b'_',
139 ];
140 let mut bytes = Bytes::new(&slice);
141
142 f(&mut bytes);
143
144 match bytes.pos() {
145 32 => true,
146 26 => false,
147 _ => unreachable!(),
148 }
149}