rocket_http/parse/uri/
parser.rs

1use pear::parsers::*;
2use pear::combinators::*;
3use pear::input::{self, Pear, Extent, Rewind, Input};
4use pear::macros::{parser, switch, parse_error, parse_try};
5
6use crate::uri::{Uri, Origin, Authority, Absolute, Reference, Asterisk};
7use crate::parse::uri::tables::*;
8use crate::parse::uri::RawInput;
9
10type Result<'a, T> = pear::input::Result<T, RawInput<'a>>;
11
12// SAFETY: Every `unsafe` here comes from bytes -> &str conversions. Since all
13// bytes are checked against tables in `tables.rs`, all of which allow only
14// ASCII characters, these are all safe.
15
16// TODO: We should cap the source we pass into `raw` to the bytes we've actually
17// checked. Otherwise, we could have good bytes followed by unchecked bad bytes
18// since eof() may not called. However, note that we only actually expose these
19// parsers via `parse!()`, which _does_ call `eof()`, so we're externally okay.
20
21#[parser(rewind)]
22pub fn complete<I, P, O>(input: &mut Pear<I>, p: P) -> input::Result<O, I>
23    where I: Input + Rewind, P: FnOnce(&mut Pear<I>) -> input::Result<O, I>
24{
25    (p()?, eof()?).0
26}
27
28/// TODO: Have a way to ask for for preference in ambiguity resolution.
29///   * An ordered [Preference] is probably best.
30///   * Need to filter/uniqueify. See `uri-pref`.
31/// Once we have this, we should probably set the default so that `authority` is
32/// preferred over `absolute`, otherwise something like `foo:3122` is absolute.
33#[parser]
34pub fn uri<'a>(input: &mut RawInput<'a>) -> Result<'a, Uri<'a>> {
35    // To resolve all ambiguities with preference, we might need to look at the
36    // complete string twice: origin/ref, asterisk/ref, authority/absolute.
37    switch! {
38        asterisk@complete(asterisk) => Uri::Asterisk(asterisk),
39        origin@complete(origin) => Uri::Origin(origin),
40        authority@complete(authority) => Uri::Authority(authority),
41        absolute@complete(absolute) => Uri::Absolute(absolute),
42        _ => Uri::Reference(reference()?)
43    }
44}
45
46#[parser]
47pub fn asterisk<'a>(input: &mut RawInput<'a>) -> Result<'a, Asterisk> {
48    eat(b'*')?;
49    Asterisk
50}
51
52#[parser]
53pub fn origin<'a>(input: &mut RawInput<'a>) -> Result<'a, Origin<'a>> {
54    let (_, path, query) = (peek(b'/')?, path()?, query()?);
55    unsafe { Origin::raw(input.start.into(), path, query) }
56}
57
58#[parser]
59pub fn authority<'a>(input: &mut RawInput<'a>) -> Result<'a, Authority<'a>> {
60    let prefix = take_while(is_reg_name_char)?;
61    let (user_info, host, port) = switch! {
62        peek(b'[') if prefix.is_empty() => (None, host()?, port()?),
63        eat(b':') => {
64            let suffix = take_while(is_reg_name_char)?;
65            switch! {
66                peek(b':') => {
67                    let end = (take_while(is_user_info_char)?, eat(b'@')?).0;
68                    (input.span(prefix, end), host()?, port()?)
69                },
70                eat(b'@') => (input.span(prefix, suffix), host()?, port()?),
71                // FIXME: Rewind to just after prefix to get the right context
72                // to be able to call `port()`. Then remove `maybe_port()`.
73                _ => (None, prefix, maybe_port(&suffix)?)
74            }
75        },
76        eat(b'@') => (Some(prefix), host()?, port()?),
77        _ => (None, prefix, None),
78    };
79
80    unsafe { Authority::raw(input.start.into(), user_info, host, port) }
81}
82
83#[parser]
84pub fn scheme<'a>(input: &mut RawInput<'a>) -> Result<'a, Extent<&'a [u8]>> {
85    let scheme = take_some_while(is_scheme_char)?;
86    if !scheme.get(0).map_or(false, |b| b.is_ascii_alphabetic()) {
87        parse_error!("invalid scheme")?;
88    }
89
90    scheme
91}
92
93#[parser]
94pub fn absolute<'a>(input: &mut RawInput<'a>) -> Result<'a, Absolute<'a>> {
95    let scheme = scheme()?;
96    let (_, (authority, path), query) = (eat(b':')?, hier_part()?, query()?);
97    unsafe { Absolute::raw(input.start.into(), scheme, authority, path, query) }
98}
99
100#[parser]
101pub fn reference<'a>(
102    input: &mut RawInput<'a>,
103) -> Result<'a, Reference<'a>> {
104    let prefix = take_while(is_scheme_char)?;
105    let (scheme, authority, path) = switch! {
106        peek(b':') if prefix.is_empty() => parse_error!("missing scheme")?,
107        eat(b':') => {
108            if !prefix.get(0).map_or(false, |b| b.is_ascii_alphabetic()) {
109                parse_error!("invalid scheme")?;
110            }
111
112            let (authority, path) = hier_part()?;
113            (Some(prefix), authority, path)
114        },
115        peek_slice(b"//") if prefix.is_empty() => {
116            let (authority, path) = hier_part()?;
117            (None, authority, path)
118        },
119        _ => {
120            let path = path()?;
121            let full_path = input.span(prefix, path).unwrap_or(none()?);
122            (None, None, full_path)
123        },
124    };
125
126    let (source, query, fragment) = (input.start.into(), query()?, fragment()?);
127    unsafe { Reference::raw(source, scheme, authority, path, query, fragment) }
128}
129
130#[parser]
131pub fn hier_part<'a>(
132    input: &mut RawInput<'a>
133) -> Result<'a, (Option<Authority<'a>>, Extent<&'a [u8]>)> {
134    switch! {
135        eat_slice(b"//") => {
136            let authority = authority()?;
137            let path = parse_try!(peek(b'/') => path()? => || none()?);
138            (Some(authority), path)
139        },
140        _ => (None, path()?)
141    }
142}
143
144#[parser]
145fn host<'a>(
146    input: &mut RawInput<'a>,
147) -> Result<'a, Extent<&'a [u8]>> {
148    switch! {
149        peek(b'[') => enclosed(b'[', is_host_char, b']')?,
150        _ => take_while(is_reg_name_char)?
151    }
152}
153
154#[parser]
155fn port<'a>(
156    input: &mut RawInput<'a>,
157) -> Result<'a, Option<u16>> {
158    if !succeeds(input, |i| eat(i, b':')) {
159        return Ok(None);
160    }
161
162    let bytes = take_n_while(5, |c| c.is_ascii_digit())?;
163    maybe_port(&bytes)?
164}
165
166// FIXME: The context here is wrong since it's empty. We should reset to
167// current - bytes.len(). Or something like that.
168#[parser]
169fn maybe_port<'a>(input: &mut RawInput<'a>, bytes: &[u8]) -> Result<'a, Option<u16>> {
170    if bytes.is_empty() {
171        return Ok(None);
172    } else if bytes.len() > 5 {
173        parse_error!("port len is out of range")?;
174    } else if !bytes.iter().all(|b| b.is_ascii_digit()) {
175        parse_error!("invalid port bytes")?;
176    }
177
178    let mut port_num: u32 = 0;
179    for (b, i) in bytes.iter().rev().zip(&[1, 10, 100, 1000, 10000]) {
180        port_num += (b - b'0') as u32 * i;
181    }
182
183    if port_num > u16::max_value() as u32 {
184        parse_error!("port out of range: {}", port_num)?;
185    }
186
187    Some(port_num as u16)
188}
189
190#[parser]
191fn path<'a>(input: &mut RawInput<'a>) -> Result<'a, Extent<&'a [u8]>> {
192    take_while(is_pchar)?
193}
194
195#[parser]
196fn query<'a>(input: &mut RawInput<'a>) -> Result<'a, Option<Extent<&'a [u8]>>> {
197    parse_try!(eat(b'?') => take_while(is_qchar)?)
198}
199
200#[parser]
201fn fragment<'a>(input: &mut RawInput<'a>) -> Result<'a, Option<Extent<&'a [u8]>>> {
202    parse_try!(eat(b'#') => take_while(is_qchar)?)
203}