rocket_http/parse/uri/
tables.rs

1/// Takes a set of sets of byte characters, return a 2^8 array with non-zero
2/// values at the indices corresponding to the character byte values.
3const fn char_table(sets: &[&[u8]]) -> [u8; 256] {
4    let mut table = [0u8; 256];
5
6    let mut i = 0;
7    while i < sets.len() {
8        let set: &[u8] = sets[i];
9
10        let mut j = 0;
11        while j < set.len() {
12            let c: u8 = set[j];
13            table[c as usize] = c;
14            j += 1;
15        }
16
17        i += 1;
18    }
19
20    table
21}
22
23const ALPHA: &[u8] = &[
24    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L',
25    b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
26    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
27    b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
28    b'w', b'x', b'y', b'z'
29];
30
31const DIGIT: &[u8] = &[
32    b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'
33];
34
35const PCT_ENCODED: &[u8] = &[
36    b'%', b'A', b'B', b'C', b'D', b'E', b'F', b'a', b'b', b'c', b'd', b'e',
37    b'f', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'
38];
39
40const SUB_DELIMS: &[u8] = &[
41    b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'='
42];
43
44const SCHEME_CHAR: [u8; 256] = char_table(&[
45    ALPHA, DIGIT, &[b'+', b'-', b'.']
46]);
47
48const UNRESERVED: [u8; 256] = char_table(&[
49    ALPHA, DIGIT, &[b'-', b'.', b'_', b'~']
50]);
51
52const REG_NAME_CHARS: [u8; 256] = char_table(&[
53    &UNRESERVED, PCT_ENCODED, SUB_DELIMS
54]);
55
56const USER_INFO_CHARS: [u8; 256] = char_table(&[
57    &REG_NAME_CHARS, &[b':']
58]);
59
60pub const PATH_CHARS: [u8; 256] = char_table(&[
61    &REG_NAME_CHARS, &[b':', b'@', b'/'],
62
63    // NOTE: these are _not_ accepted in RFC 7230/3986. However, browsers
64    // routinely send these unencoded, so allow them to support the real-world.
65    &[b'[',  b']'],
66]);
67
68const QUERY_CHARS: [u8; 256] = char_table(&[
69    &PATH_CHARS, &[b'/', b'?'],
70
71    // NOTE: these are _not_ accepted in RFC 7230/3986. However, browsers
72    // routinely send these unencoded, so allow them to support the real-world.
73    &[b'{', b'}', b'[',  b']', b'\\',  b'^',  b'`', b'|'],
74]);
75
76#[inline(always)]
77pub const fn is_pchar(&c: &u8) -> bool { PATH_CHARS[c as usize] != 0 }
78
79#[inline(always)]
80pub const fn is_host_char(c: &u8) -> bool { is_pchar(c) && *c != b'[' && *c != b']' }
81
82#[inline(always)]
83pub const fn is_scheme_char(&c: &u8) -> bool { SCHEME_CHAR[c as usize] != 0 }
84
85#[inline(always)]
86pub const fn is_user_info_char(&c: &u8) -> bool { USER_INFO_CHARS[c as usize] != 0 }
87
88#[inline(always)]
89pub const fn is_qchar(&c: &u8) -> bool { QUERY_CHARS[c as usize] != 0 }
90
91#[inline(always)]
92pub const fn is_reg_name_char(&c: &u8) -> bool { REG_NAME_CHARS[c as usize] != 0 }
93
94#[cfg(test)]
95mod tests {
96    fn test_char_table(table: &[u8]) {
97        for (i, &v) in table.iter().enumerate() {
98            if v != 0 {
99                assert_eq!(i, v as usize);
100            }
101        }
102    }
103
104    #[test]
105    fn check_tables() {
106        test_char_table(&super::PATH_CHARS[..]);
107        test_char_table(&super::QUERY_CHARS[..]);
108        test_char_table(&super::REG_NAME_CHARS[..]);
109    }
110}