stable_pattern/
pattern.rs

1//! The string Pattern API.
2//!
3//! The Pattern API provides a generic mechanism for using different pattern
4//! types when searching through a string.
5//!
6//! For more details, see the traits [`Pattern`], [`Searcher`],
7//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].
8//!
9//! Although this API is unstable, it is exposed via stable APIs on the
10//! [`str`] type.
11//!
12//! # Examples
13//!
14//! [`Pattern`] is [implemented][pattern-impls] in the stable API for
15//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures
16//! implementing `FnMut(char) -> bool`.
17//!
18//! ```
19//! let s = "Can you find a needle in a haystack?";
20//!
21//! // &str pattern
22//! assert_eq!(s.find("you"), Some(4));
23//! // char pattern
24//! assert_eq!(s.find('n'), Some(2));
25//! // slice of chars pattern
26//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1));
27//! // closure pattern
28//! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35));
29//! ```
30//!
31//! [pattern-impls]: Pattern#implementors
32
33use core::cmp;
34use core::fmt;
35
36// Pattern
37
38/// A string pattern.
39///
40/// A `Pattern<'a>` expresses that the implementing type
41/// can be used as a string pattern for searching in a [`&'a str`][str].
42///
43/// For example, both `'a'` and `"aa"` are patterns that
44/// would match at index `1` in the string `"baaaab"`.
45///
46/// The trait itself acts as a builder for an associated
47/// [`Searcher`] type, which does the actual work of finding
48/// occurrences of the pattern in a string.
49///
50/// Depending on the type of the pattern, the behaviour of methods like
51/// [`str::find`] and [`str::contains`] can change. The table below describes
52/// some of those behaviours.
53///
54/// | Pattern type             | Match condition                           |
55/// |--------------------------|-------------------------------------------|
56/// | `&str`                   | is substring                              |
57/// | `char`                   | is contained in string                    |
58/// | `&[char]`                | any char in slice is contained in string  |
59/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string   |
60/// | `&&str`                  | is substring                              |
61/// | `&String`                | is substring                              |
62///
63/// # Examples
64///
65/// ```
66/// // &str
67/// assert_eq!("abaaa".find("ba"), Some(1));
68/// assert_eq!("abaaa".find("bac"), None);
69///
70/// // char
71/// assert_eq!("abaaa".find('a'), Some(0));
72/// assert_eq!("abaaa".find('b'), Some(1));
73/// assert_eq!("abaaa".find('c'), None);
74///
75/// // &[char]
76/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0));
77/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0));
78/// assert_eq!("abaaa".find(&['c', 'd'][..]), None);
79///
80/// // FnMut(char) -> bool
81/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4));
82/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None);
83/// ```
84pub trait Pattern<'a>: Sized {
85    /// Associated searcher for this pattern
86    type Searcher: Searcher<'a>;
87
88    /// Constructs the associated searcher from
89    /// `self` and the `haystack` to search in.
90    fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
91
92    /// Checks whether the pattern matches anywhere in the haystack
93    #[inline]
94    fn is_contained_in(self, haystack: &'a str) -> bool {
95        self.into_searcher(haystack).next_match().is_some()
96    }
97
98    /// Checks whether the pattern matches at the front of the haystack
99    #[inline]
100    fn is_prefix_of(self, haystack: &'a str) -> bool {
101        matches!(self.into_searcher(haystack).next(), SearchStep::Match(0, _))
102    }
103
104    /// Checks whether the pattern matches at the back of the haystack
105    #[inline]
106    fn is_suffix_of(self, haystack: &'a str) -> bool
107    where
108        Self::Searcher: ReverseSearcher<'a>,
109    {
110        matches!(self.into_searcher(haystack).next_back(), SearchStep::Match(_, j) if haystack.len() == j)
111    }
112
113    /// Removes the pattern from the front of haystack, if it matches.
114    #[inline]
115    fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
116        if let SearchStep::Match(start, len) = self.into_searcher(haystack).next() {
117            debug_assert_eq!(
118                start, 0,
119                "The first search step from Searcher \
120                 must include the first character"
121            );
122            // SAFETY: `Searcher` is known to return valid indices.
123            unsafe { Some(haystack.get_unchecked(len..)) }
124        } else {
125            None
126        }
127    }
128
129    /// Removes the pattern from the back of haystack, if it matches.
130    #[inline]
131    fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
132    where
133        Self::Searcher: ReverseSearcher<'a>,
134    {
135        if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() {
136            debug_assert_eq!(
137                end,
138                haystack.len(),
139                "The first search step from ReverseSearcher \
140                 must include the last character"
141            );
142            // SAFETY: `Searcher` is known to return valid indices.
143            unsafe { Some(haystack.get_unchecked(..start)) }
144        } else {
145            None
146        }
147    }
148}
149
150// Searcher
151
152/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`].
153#[derive(Copy, Clone, Eq, PartialEq, Debug)]
154pub enum SearchStep {
155    /// Expresses that a match of the pattern has been found at
156    /// `haystack[a..b]`.
157    Match(usize, usize),
158    /// Expresses that `haystack[a..b]` has been rejected as a possible match
159    /// of the pattern.
160    ///
161    /// Note that there might be more than one `Reject` between two `Match`es,
162    /// there is no requirement for them to be combined into one.
163    Reject(usize, usize),
164    /// Expresses that every byte of the haystack has been visited, ending
165    /// the iteration.
166    Done,
167}
168
169/// A searcher for a string pattern.
170///
171/// This trait provides methods for searching for non-overlapping
172/// matches of a pattern starting from the front (left) of a string.
173///
174/// It will be implemented by associated `Searcher`
175/// types of the [`Pattern`] trait.
176///
177/// The trait is marked unsafe because the indices returned by the
178/// [`next()`][Searcher::next] methods are required to lie on valid utf8
179/// boundaries in the haystack. This enables consumers of this trait to
180/// slice the haystack without additional runtime checks.
181pub unsafe trait Searcher<'a> {
182    /// Getter for the underlying string to be searched in
183    ///
184    /// Will always return the same [`&str`][str].
185    fn haystack(&self) -> &'a str;
186
187    /// Performs the next search step starting from the front.
188    ///
189    /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches
190    ///   the pattern.
191    /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can
192    ///   not match the pattern, even partially.
193    /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has
194    ///   been visited.
195    ///
196    /// The stream of [`Match`][SearchStep::Match] and
197    /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done]
198    /// will contain index ranges that are adjacent, non-overlapping,
199    /// covering the whole haystack, and laying on utf8 boundaries.
200    ///
201    /// A [`Match`][SearchStep::Match] result needs to contain the whole matched
202    /// pattern, however [`Reject`][SearchStep::Reject] results may be split up
203    /// into arbitrary many adjacent fragments. Both ranges may have zero length.
204    ///
205    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
206    /// might produce the stream
207    /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
208    fn next(&mut self) -> SearchStep;
209
210    /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next].
211    ///
212    /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges
213    /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return
214    /// `(start_match, end_match)`, where start_match is the index of where
215    /// the match begins, and end_match is the index after the end of the match.
216    #[inline]
217    fn next_match(&mut self) -> Option<(usize, usize)> {
218        loop {
219            match self.next() {
220                SearchStep::Match(a, b) => return Some((a, b)),
221                SearchStep::Done => return None,
222                _ => continue,
223            }
224        }
225    }
226
227    /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next]
228    /// and [`next_match()`][Searcher::next_match].
229    ///
230    /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges
231    /// of this and [`next_match`][Searcher::next_match] will overlap.
232    #[inline]
233    fn next_reject(&mut self) -> Option<(usize, usize)> {
234        loop {
235            match self.next() {
236                SearchStep::Reject(a, b) => return Some((a, b)),
237                SearchStep::Done => return None,
238                _ => continue,
239            }
240        }
241    }
242}
243
244/// A reverse searcher for a string pattern.
245///
246/// This trait provides methods for searching for non-overlapping
247/// matches of a pattern starting from the back (right) of a string.
248///
249/// It will be implemented by associated [`Searcher`]
250/// types of the [`Pattern`] trait if the pattern supports searching
251/// for it from the back.
252///
253/// The index ranges returned by this trait are not required
254/// to exactly match those of the forward search in reverse.
255///
256/// For the reason why this trait is marked unsafe, see them
257/// parent trait [`Searcher`].
258pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
259    /// Performs the next search step starting from the back.
260    ///
261    /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]`
262    ///   matches the pattern.
263    /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]`
264    ///   can not match the pattern, even partially.
265    /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack
266    ///   has been visited
267    ///
268    /// The stream of [`Match`][SearchStep::Match] and
269    /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done]
270    /// will contain index ranges that are adjacent, non-overlapping,
271    /// covering the whole haystack, and laying on utf8 boundaries.
272    ///
273    /// A [`Match`][SearchStep::Match] result needs to contain the whole matched
274    /// pattern, however [`Reject`][SearchStep::Reject] results may be split up
275    /// into arbitrary many adjacent fragments. Both ranges may have zero length.
276    ///
277    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
278    /// might produce the stream
279    /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`.
280    fn next_back(&mut self) -> SearchStep;
281
282    /// Finds the next [`Match`][SearchStep::Match] result.
283    /// See [`next_back()`][ReverseSearcher::next_back].
284    #[inline]
285    fn next_match_back(&mut self) -> Option<(usize, usize)> {
286        loop {
287            match self.next_back() {
288                SearchStep::Match(a, b) => return Some((a, b)),
289                SearchStep::Done => return None,
290                _ => continue,
291            }
292        }
293    }
294
295    /// Finds the next [`Reject`][SearchStep::Reject] result.
296    /// See [`next_back()`][ReverseSearcher::next_back].
297    #[inline]
298    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
299        loop {
300            match self.next_back() {
301                SearchStep::Reject(a, b) => return Some((a, b)),
302                SearchStep::Done => return None,
303                _ => continue,
304            }
305        }
306    }
307}
308
309/// A marker trait to express that a [`ReverseSearcher`]
310/// can be used for a [`DoubleEndedIterator`] implementation.
311///
312/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need
313/// to follow these conditions:
314///
315/// - All results of `next()` need to be identical
316///   to the results of `next_back()` in reverse order.
317/// - `next()` and `next_back()` need to behave as
318///   the two ends of a range of values, that is they
319///   can not "walk past each other".
320///
321/// # Examples
322///
323/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
324/// [`char`] only requires looking at one at a time, which behaves the same
325/// from both ends.
326///
327/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
328/// the pattern `"aa"` in the haystack `"aaa"` matches as either
329/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
330pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
331
332/////////////////////////////////////////////////////////////////////////////
333// Impl for char
334/////////////////////////////////////////////////////////////////////////////
335
336/// Associated type for `<char as Pattern<'a>>::Searcher`.
337#[derive(Clone, Debug)]
338pub struct CharSearcher<'a> {
339    haystack: &'a str,
340    // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
341    // This invariant can be broken *within* next_match and next_match_back, however
342    // they must exit with fingers on valid code point boundaries.
343    /// `finger` is the current byte index of the forward search.
344    /// Imagine that it exists before the byte at its index, i.e.
345    /// `haystack[finger]` is the first byte of the slice we must inspect during
346    /// forward searching
347    finger: usize,
348    /// `finger_back` is the current byte index of the reverse search.
349    /// Imagine that it exists after the byte at its index, i.e.
350    /// haystack[finger_back - 1] is the last byte of the slice we must inspect during
351    /// forward searching (and thus the first byte to be inspected when calling next_back()).
352    finger_back: usize,
353    /// The character being searched for
354    needle: char,
355
356    // safety invariant: `utf8_size` must be less than 5
357    /// The number of bytes `needle` takes up when encoded in utf8.
358    utf8_size: usize,
359    /// A utf8 encoded copy of the `needle`
360    utf8_encoded: [u8; 4],
361}
362
363unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
364    #[inline]
365    fn haystack(&self) -> &'a str {
366        self.haystack
367    }
368    #[inline]
369    fn next(&mut self) -> SearchStep {
370        let old_finger = self.finger;
371        // SAFETY: 1-4 guarantee safety of `get_unchecked`
372        // 1. `self.finger` and `self.finger_back` are kept on unicode boundaries
373        //    (this is invariant)
374        // 2. `self.finger >= 0` since it starts at 0 and only increases
375        // 3. `self.finger < self.finger_back` because otherwise the char `iter`
376        //    would return `SearchStep::Done`
377        // 4. `self.finger` comes before the end of the haystack because `self.finger_back`
378        //    starts at the end and only decreases
379        let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
380        let mut iter = slice.chars();
381        let old_len = iter.as_str().len();
382        if let Some(ch) = iter.next() {
383            // add byte offset of current character
384            // without re-encoding as utf-8
385            self.finger += old_len - iter.as_str().len();
386            if ch == self.needle {
387                SearchStep::Match(old_finger, self.finger)
388            } else {
389                SearchStep::Reject(old_finger, self.finger)
390            }
391        } else {
392            SearchStep::Done
393        }
394    }
395    #[inline]
396    fn next_match(&mut self) -> Option<(usize, usize)> {
397        loop {
398            // get the haystack after the last character found
399            let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
400            // the last byte of the utf8 encoded needle
401            // SAFETY: we have an invariant that `utf8_size < 5`
402            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
403            if let Some(index) = memchr::memchr(last_byte, bytes) {
404                // The new finger is the index of the byte we found,
405                // plus one, since we memchr'd for the last byte of the character.
406                //
407                // Note that this doesn't always give us a finger on a UTF8 boundary.
408                // If we *didn't* find our character
409                // we may have indexed to the non-last byte of a 3-byte or 4-byte character.
410                // We can't just skip to the next valid starting byte because a character like
411                // ꁁ (U+A041 YI SYLLABLE PA), utf-8 `EA 81 81` will have us always find
412                // the second byte when searching for the third.
413                //
414                // However, this is totally okay. While we have the invariant that
415                // self.finger is on a UTF8 boundary, this invariant is not relied upon
416                // within this method (it is relied upon in CharSearcher::next()).
417                //
418                // We only exit this method when we reach the end of the string, or if we
419                // find something. When we find something the `finger` will be set
420                // to a UTF8 boundary.
421                self.finger += index + 1;
422                if self.finger >= self.utf8_size {
423                    let found_char = self.finger - self.utf8_size;
424                    if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
425                        if slice == &self.utf8_encoded[0..self.utf8_size] {
426                            return Some((found_char, self.finger));
427                        }
428                    }
429                }
430            } else {
431                // found nothing, exit
432                self.finger = self.finger_back;
433                return None;
434            }
435        }
436    }
437
438    // let next_reject use the default implementation from the Searcher trait
439}
440
441unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
442    #[inline]
443    fn next_back(&mut self) -> SearchStep {
444        let old_finger = self.finger_back;
445        // SAFETY: see the comment for next() above
446        let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
447        let mut iter = slice.chars();
448        let old_len = iter.as_str().len();
449        if let Some(ch) = iter.next_back() {
450            // subtract byte offset of current character
451            // without re-encoding as utf-8
452            self.finger_back -= old_len - iter.as_str().len();
453            if ch == self.needle {
454                SearchStep::Match(self.finger_back, old_finger)
455            } else {
456                SearchStep::Reject(self.finger_back, old_finger)
457            }
458        } else {
459            SearchStep::Done
460        }
461    }
462    #[inline]
463    fn next_match_back(&mut self) -> Option<(usize, usize)> {
464        let haystack = self.haystack.as_bytes();
465        loop {
466            // get the haystack up to but not including the last character searched
467            let bytes = haystack.get(self.finger..self.finger_back)?;
468            // the last byte of the utf8 encoded needle
469            // SAFETY: we have an invariant that `utf8_size < 5`
470            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
471            if let Some(index) = memchr::memrchr(last_byte, bytes) {
472                // we searched a slice that was offset by self.finger,
473                // add self.finger to recoup the original index
474                let index = self.finger + index;
475                // memrchr will return the index of the byte we wish to
476                // find. In case of an ASCII character, this is indeed
477                // were we wish our new finger to be ("after" the found
478                // char in the paradigm of reverse iteration). For
479                // multibyte chars we need to skip down by the number of more
480                // bytes they have than ASCII
481                let shift = self.utf8_size - 1;
482                if index >= shift {
483                    let found_char = index - shift;
484                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
485                        if slice == &self.utf8_encoded[0..self.utf8_size] {
486                            // move finger to before the character found (i.e., at its start index)
487                            self.finger_back = found_char;
488                            return Some((self.finger_back, self.finger_back + self.utf8_size));
489                        }
490                    }
491                }
492                // We can't use finger_back = index - size + 1 here. If we found the last char
493                // of a different-sized character (or the middle byte of a different character)
494                // we need to bump the finger_back down to `index`. This similarly makes
495                // `finger_back` have the potential to no longer be on a boundary,
496                // but this is OK since we only exit this function on a boundary
497                // or when the haystack has been searched completely.
498                //
499                // Unlike next_match this does not
500                // have the problem of repeated bytes in utf-8 because
501                // we're searching for the last byte, and we can only have
502                // found the last byte when searching in reverse.
503                self.finger_back = index;
504            } else {
505                self.finger_back = self.finger;
506                // found nothing, exit
507                return None;
508            }
509        }
510    }
511
512    // let next_reject_back use the default implementation from the Searcher trait
513}
514
515impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
516
517/// Searches for chars that are equal to a given [`char`].
518///
519/// # Examples
520///
521/// ```
522/// assert_eq!("Hello world".find('o'), Some(4));
523/// ```
524impl<'a> Pattern<'a> for char {
525    type Searcher = CharSearcher<'a>;
526
527    #[inline]
528    fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
529        let mut utf8_encoded = [0; 4];
530        let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
531        CharSearcher {
532            haystack,
533            finger: 0,
534            finger_back: haystack.len(),
535            needle: self,
536            utf8_size,
537            utf8_encoded,
538        }
539    }
540
541    #[inline]
542    fn is_contained_in(self, haystack: &'a str) -> bool {
543        if (self as u32) < 128 {
544            haystack.as_bytes().contains(&(self as u8))
545        } else {
546            let mut buffer = [0u8; 4];
547            self.encode_utf8(&mut buffer).is_contained_in(haystack)
548        }
549    }
550
551    #[inline]
552    fn is_prefix_of(self, haystack: &'a str) -> bool {
553        self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack)
554    }
555
556    #[inline]
557    fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
558        self.encode_utf8(&mut [0u8; 4]).strip_prefix_of(haystack)
559    }
560
561    #[inline]
562    fn is_suffix_of(self, haystack: &'a str) -> bool
563    where
564        Self::Searcher: ReverseSearcher<'a>,
565    {
566        self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack)
567    }
568
569    #[inline]
570    fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
571    where
572        Self::Searcher: ReverseSearcher<'a>,
573    {
574        self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
575    }
576}
577
578/////////////////////////////////////////////////////////////////////////////
579// Impl for a MultiCharEq wrapper
580/////////////////////////////////////////////////////////////////////////////
581
582#[doc(hidden)]
583trait MultiCharEq {
584    fn matches(&mut self, c: char) -> bool;
585}
586
587impl<F> MultiCharEq for F
588where
589    F: FnMut(char) -> bool,
590{
591    #[inline]
592    fn matches(&mut self, c: char) -> bool {
593        (*self)(c)
594    }
595}
596
597impl MultiCharEq for &[char] {
598    #[inline]
599    fn matches(&mut self, c: char) -> bool {
600        self.iter().any(|&m| m == c)
601    }
602}
603
604struct MultiCharEqPattern<C: MultiCharEq>(C);
605
606#[derive(Clone, Debug)]
607struct MultiCharEqSearcher<'a, C: MultiCharEq> {
608    char_eq: C,
609    haystack: &'a str,
610    char_indices: core::str::CharIndices<'a>,
611}
612
613impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern<C> {
614    type Searcher = MultiCharEqSearcher<'a, C>;
615
616    #[inline]
617    fn into_searcher(self, haystack: &'a str) -> MultiCharEqSearcher<'a, C> {
618        MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
619    }
620}
621
622unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
623    #[inline]
624    fn haystack(&self) -> &'a str {
625        self.haystack
626    }
627
628    #[inline]
629    fn next(&mut self) -> SearchStep {
630        let s = &mut self.char_indices;
631        // Compare lengths of the internal byte slice iterator
632        // to find length of current char
633        let pre_len = s.as_str().len();
634        if let Some((i, c)) = s.next() {
635            let len = s.as_str().len();
636            let char_len = pre_len - len;
637            if self.char_eq.matches(c) {
638                return SearchStep::Match(i, i + char_len);
639            } else {
640                return SearchStep::Reject(i, i + char_len);
641            }
642        }
643        SearchStep::Done
644    }
645}
646
647unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> {
648    #[inline]
649    fn next_back(&mut self) -> SearchStep {
650        let s = &mut self.char_indices;
651        // Compare lengths of the internal byte slice iterator
652        // to find length of current char
653        let pre_len = s.as_str().len();
654        if let Some((i, c)) = s.next_back() {
655            let len = s.as_str().len();
656            let char_len = pre_len - len;
657            if self.char_eq.matches(c) {
658                return SearchStep::Match(i, i + char_len);
659            } else {
660                return SearchStep::Reject(i, i + char_len);
661            }
662        }
663        SearchStep::Done
664    }
665}
666
667impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {}
668
669/////////////////////////////////////////////////////////////////////////////
670
671macro_rules! pattern_methods {
672    ($t:ty, $pmap:expr, $smap:expr) => {
673        type Searcher = $t;
674
675        #[inline]
676        fn into_searcher(self, haystack: &'a str) -> $t {
677            ($smap)(($pmap)(self).into_searcher(haystack))
678        }
679
680        #[inline]
681        fn is_contained_in(self, haystack: &'a str) -> bool {
682            ($pmap)(self).is_contained_in(haystack)
683        }
684
685        #[inline]
686        fn is_prefix_of(self, haystack: &'a str) -> bool {
687            ($pmap)(self).is_prefix_of(haystack)
688        }
689
690        #[inline]
691        fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
692            ($pmap)(self).strip_prefix_of(haystack)
693        }
694
695        #[inline]
696        fn is_suffix_of(self, haystack: &'a str) -> bool
697        where
698            $t: ReverseSearcher<'a>,
699        {
700            ($pmap)(self).is_suffix_of(haystack)
701        }
702
703        #[inline]
704        fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
705        where
706            $t: ReverseSearcher<'a>,
707        {
708            ($pmap)(self).strip_suffix_of(haystack)
709        }
710    };
711}
712
713macro_rules! searcher_methods {
714    (forward) => {
715        #[inline]
716        fn haystack(&self) -> &'a str {
717            self.0.haystack()
718        }
719        #[inline]
720        fn next(&mut self) -> SearchStep {
721            self.0.next()
722        }
723        #[inline]
724        fn next_match(&mut self) -> Option<(usize, usize)> {
725            self.0.next_match()
726        }
727        #[inline]
728        fn next_reject(&mut self) -> Option<(usize, usize)> {
729            self.0.next_reject()
730        }
731    };
732    (reverse) => {
733        #[inline]
734        fn next_back(&mut self) -> SearchStep {
735            self.0.next_back()
736        }
737        #[inline]
738        fn next_match_back(&mut self) -> Option<(usize, usize)> {
739            self.0.next_match_back()
740        }
741        #[inline]
742        fn next_reject_back(&mut self) -> Option<(usize, usize)> {
743            self.0.next_reject_back()
744        }
745    };
746}
747
748/////////////////////////////////////////////////////////////////////////////
749// Impl for &[char]
750/////////////////////////////////////////////////////////////////////////////
751
752// Todo: Change / Remove due to ambiguity in meaning.
753
754/// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
755#[derive(Clone, Debug)]
756pub struct CharSliceSearcher<'a, 'b>(<MultiCharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
757
758unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
759    searcher_methods!(forward);
760}
761
762unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
763    searcher_methods!(reverse);
764}
765
766impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
767
768/// Searches for chars that are equal to any of the [`char`]s in the slice.
769///
770/// # Examples
771///
772/// ```
773/// assert_eq!("Hello world".find(&['l', 'l'] as &[_]), Some(2));
774/// assert_eq!("Hello world".find(&['l', 'l'][..]), Some(2));
775/// ```
776impl<'a, 'b> Pattern<'a> for &'b [char] {
777    pattern_methods!(CharSliceSearcher<'a, 'b>, MultiCharEqPattern, CharSliceSearcher);
778}
779
780/////////////////////////////////////////////////////////////////////////////
781// Impl for F: FnMut(char) -> bool
782/////////////////////////////////////////////////////////////////////////////
783
784/// Associated type for `<F as Pattern<'a>>::Searcher`.
785#[derive(Clone)]
786pub struct CharPredicateSearcher<'a, F>(<MultiCharEqPattern<F> as Pattern<'a>>::Searcher)
787where
788    F: FnMut(char) -> bool;
789
790impl<F> fmt::Debug for CharPredicateSearcher<'_, F>
791where
792    F: FnMut(char) -> bool,
793{
794    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
795        f.debug_struct("CharPredicateSearcher")
796            .field("haystack", &self.0.haystack)
797            .field("char_indices", &self.0.char_indices)
798            .finish()
799    }
800}
801unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
802where
803    F: FnMut(char) -> bool,
804{
805    searcher_methods!(forward);
806}
807
808unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
809where
810    F: FnMut(char) -> bool,
811{
812    searcher_methods!(reverse);
813}
814
815impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {}
816
817/// Searches for [`char`]s that match the given predicate.
818///
819/// # Examples
820///
821/// ```
822/// assert_eq!("Hello world".find(char::is_uppercase), Some(0));
823/// assert_eq!("Hello world".find(|c| "aeiou".contains(c)), Some(1));
824/// ```
825impl<'a, F> Pattern<'a> for F
826where
827    F: FnMut(char) -> bool,
828{
829    pattern_methods!(CharPredicateSearcher<'a, F>, MultiCharEqPattern, CharPredicateSearcher);
830}
831
832/////////////////////////////////////////////////////////////////////////////
833// Impl for &&str
834/////////////////////////////////////////////////////////////////////////////
835
836/// Delegates to the `&str` impl.
837impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str {
838    pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);
839}
840
841/////////////////////////////////////////////////////////////////////////////
842// Impl for &str
843/////////////////////////////////////////////////////////////////////////////
844
845/// Non-allocating substring search.
846///
847/// Will handle the pattern `""` as returning empty matches at each character
848/// boundary.
849///
850/// # Examples
851///
852/// ```
853/// assert_eq!("Hello world".find("world"), Some(6));
854/// ```
855impl<'a, 'b> Pattern<'a> for &'b str {
856    type Searcher = StrSearcher<'a, 'b>;
857
858    #[inline]
859    fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
860        StrSearcher::new(haystack, self)
861    }
862
863    /// Checks whether the pattern matches at the front of the haystack.
864    #[inline]
865    fn is_prefix_of(self, haystack: &'a str) -> bool {
866        haystack.as_bytes().starts_with(self.as_bytes())
867    }
868
869    /// Removes the pattern from the front of haystack, if it matches.
870    #[inline]
871    fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
872        if self.is_prefix_of(haystack) {
873            // SAFETY: prefix was just verified to exist.
874            unsafe { Some(haystack.get_unchecked(self.as_bytes().len()..)) }
875        } else {
876            None
877        }
878    }
879
880    /// Checks whether the pattern matches at the back of the haystack.
881    #[inline]
882    fn is_suffix_of(self, haystack: &'a str) -> bool {
883        haystack.as_bytes().ends_with(self.as_bytes())
884    }
885
886    /// Removes the pattern from the back of haystack, if it matches.
887    #[inline]
888    fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> {
889        if self.is_suffix_of(haystack) {
890            let i = haystack.len() - self.as_bytes().len();
891            // SAFETY: suffix was just verified to exist.
892            unsafe { Some(haystack.get_unchecked(..i)) }
893        } else {
894            None
895        }
896    }
897}
898
899/////////////////////////////////////////////////////////////////////////////
900// Two Way substring searcher
901/////////////////////////////////////////////////////////////////////////////
902
903#[derive(Clone, Debug)]
904/// Associated type for `<&str as Pattern<'a>>::Searcher`.
905pub struct StrSearcher<'a, 'b> {
906    haystack: &'a str,
907    needle: &'b str,
908
909    searcher: StrSearcherImpl,
910}
911
912#[derive(Clone, Debug)]
913enum StrSearcherImpl {
914    Empty(EmptyNeedle),
915    TwoWay(TwoWaySearcher),
916}
917
918#[derive(Clone, Debug)]
919struct EmptyNeedle {
920    position: usize,
921    end: usize,
922    is_match_fw: bool,
923    is_match_bw: bool,
924}
925
926impl<'a, 'b> StrSearcher<'a, 'b> {
927    fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
928        if needle.is_empty() {
929            StrSearcher {
930                haystack,
931                needle,
932                searcher: StrSearcherImpl::Empty(EmptyNeedle {
933                    position: 0,
934                    end: haystack.len(),
935                    is_match_fw: true,
936                    is_match_bw: true,
937                }),
938            }
939        } else {
940            StrSearcher {
941                haystack,
942                needle,
943                searcher: StrSearcherImpl::TwoWay(TwoWaySearcher::new(
944                    needle.as_bytes(),
945                    haystack.len(),
946                )),
947            }
948        }
949    }
950}
951
952unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
953    #[inline]
954    fn haystack(&self) -> &'a str {
955        self.haystack
956    }
957
958    #[inline]
959    fn next(&mut self) -> SearchStep {
960        match self.searcher {
961            StrSearcherImpl::Empty(ref mut searcher) => {
962                // empty needle rejects every char and matches every empty string between them
963                let is_match = searcher.is_match_fw;
964                searcher.is_match_fw = !searcher.is_match_fw;
965                let pos = searcher.position;
966                match self.haystack[pos..].chars().next() {
967                    _ if is_match => SearchStep::Match(pos, pos),
968                    None => SearchStep::Done,
969                    Some(ch) => {
970                        searcher.position += ch.len_utf8();
971                        SearchStep::Reject(pos, searcher.position)
972                    }
973                }
974            }
975            StrSearcherImpl::TwoWay(ref mut searcher) => {
976                // TwoWaySearcher produces valid *Match* indices that split at char boundaries
977                // as long as it does correct matching and that haystack and needle are
978                // valid UTF-8
979                // *Rejects* from the algorithm can fall on any indices, but we will walk them
980                // manually to the next character boundary, so that they are utf-8 safe.
981                if searcher.position == self.haystack.len() {
982                    return SearchStep::Done;
983                }
984                let is_long = searcher.memory == usize::MAX;
985                match searcher.next::<RejectAndMatch>(
986                    self.haystack.as_bytes(),
987                    self.needle.as_bytes(),
988                    is_long,
989                ) {
990                    SearchStep::Reject(a, mut b) => {
991                        // skip to next char boundary
992                        while !self.haystack.is_char_boundary(b) {
993                            b += 1;
994                        }
995                        searcher.position = cmp::max(b, searcher.position);
996                        SearchStep::Reject(a, b)
997                    }
998                    otherwise => otherwise,
999                }
1000            }
1001        }
1002    }
1003
1004    #[inline]
1005    fn next_match(&mut self) -> Option<(usize, usize)> {
1006        match self.searcher {
1007            StrSearcherImpl::Empty(..) => loop {
1008                match self.next() {
1009                    SearchStep::Match(a, b) => return Some((a, b)),
1010                    SearchStep::Done => return None,
1011                    SearchStep::Reject(..) => {}
1012                }
1013            },
1014            StrSearcherImpl::TwoWay(ref mut searcher) => {
1015                let is_long = searcher.memory == usize::MAX;
1016                // write out `true` and `false` cases to encourage the compiler
1017                // to specialize the two cases separately.
1018                if is_long {
1019                    searcher.next::<MatchOnly>(
1020                        self.haystack.as_bytes(),
1021                        self.needle.as_bytes(),
1022                        true,
1023                    )
1024                } else {
1025                    searcher.next::<MatchOnly>(
1026                        self.haystack.as_bytes(),
1027                        self.needle.as_bytes(),
1028                        false,
1029                    )
1030                }
1031            }
1032        }
1033    }
1034}
1035
1036unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
1037    #[inline]
1038    fn next_back(&mut self) -> SearchStep {
1039        match self.searcher {
1040            StrSearcherImpl::Empty(ref mut searcher) => {
1041                let is_match = searcher.is_match_bw;
1042                searcher.is_match_bw = !searcher.is_match_bw;
1043                let end = searcher.end;
1044                match self.haystack[..end].chars().next_back() {
1045                    _ if is_match => SearchStep::Match(end, end),
1046                    None => SearchStep::Done,
1047                    Some(ch) => {
1048                        searcher.end -= ch.len_utf8();
1049                        SearchStep::Reject(searcher.end, end)
1050                    }
1051                }
1052            }
1053            StrSearcherImpl::TwoWay(ref mut searcher) => {
1054                if searcher.end == 0 {
1055                    return SearchStep::Done;
1056                }
1057                let is_long = searcher.memory == usize::MAX;
1058                match searcher.next_back::<RejectAndMatch>(
1059                    self.haystack.as_bytes(),
1060                    self.needle.as_bytes(),
1061                    is_long,
1062                ) {
1063                    SearchStep::Reject(mut a, b) => {
1064                        // skip to next char boundary
1065                        while !self.haystack.is_char_boundary(a) {
1066                            a -= 1;
1067                        }
1068                        searcher.end = cmp::min(a, searcher.end);
1069                        SearchStep::Reject(a, b)
1070                    }
1071                    otherwise => otherwise,
1072                }
1073            }
1074        }
1075    }
1076
1077    #[inline]
1078    fn next_match_back(&mut self) -> Option<(usize, usize)> {
1079        match self.searcher {
1080            StrSearcherImpl::Empty(..) => loop {
1081                match self.next_back() {
1082                    SearchStep::Match(a, b) => return Some((a, b)),
1083                    SearchStep::Done => return None,
1084                    SearchStep::Reject(..) => {}
1085                }
1086            },
1087            StrSearcherImpl::TwoWay(ref mut searcher) => {
1088                let is_long = searcher.memory == usize::MAX;
1089                // write out `true` and `false`, like `next_match`
1090                if is_long {
1091                    searcher.next_back::<MatchOnly>(
1092                        self.haystack.as_bytes(),
1093                        self.needle.as_bytes(),
1094                        true,
1095                    )
1096                } else {
1097                    searcher.next_back::<MatchOnly>(
1098                        self.haystack.as_bytes(),
1099                        self.needle.as_bytes(),
1100                        false,
1101                    )
1102                }
1103            }
1104        }
1105    }
1106}
1107
1108/// The internal state of the two-way substring search algorithm.
1109#[derive(Clone, Debug)]
1110struct TwoWaySearcher {
1111    // constants
1112    /// critical factorization index
1113    crit_pos: usize,
1114    /// critical factorization index for reversed needle
1115    crit_pos_back: usize,
1116    period: usize,
1117    /// `byteset` is an extension (not part of the two way algorithm);
1118    /// it's a 64-bit "fingerprint" where each set bit `j` corresponds
1119    /// to a (byte & 63) == j present in the needle.
1120    byteset: u64,
1121
1122    // variables
1123    position: usize,
1124    end: usize,
1125    /// index into needle before which we have already matched
1126    memory: usize,
1127    /// index into needle after which we have already matched
1128    memory_back: usize,
1129}
1130
1131/*
1132    This is the Two-Way search algorithm, which was introduced in the paper:
1133    Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
1134
1135    Here's some background information.
1136
1137    A *word* is a string of symbols. The *length* of a word should be a familiar
1138    notion, and here we denote it for any word x by |x|.
1139    (We also allow for the possibility of the *empty word*, a word of length zero).
1140
1141    If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be a
1142    *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == x[i+p].
1143    For example, both 1 and 2 are periods for the string "aa". As another example,
1144    the only period of the string "abcd" is 4.
1145
1146    We denote by period(x) the *smallest* period of x (provided that x is non-empty).
1147    This is always well-defined since every non-empty word x has at least one period,
1148    |x|. We sometimes call this *the period* of x.
1149
1150    If u, v and x are words such that x = uv, where uv is the concatenation of u and
1151    v, then we say that (u, v) is a *factorization* of x.
1152
1153    Let (u, v) be a factorization for a word x. Then if w is a non-empty word such
1154    that both of the following hold
1155
1156      - either w is a suffix of u or u is a suffix of w
1157      - either w is a prefix of v or v is a prefix of w
1158
1159    then w is said to be a *repetition* for the factorization (u, v).
1160
1161    Just to unpack this, there are four possibilities here. Let w = "abc". Then we
1162    might have:
1163
1164      - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
1165      - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
1166      - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
1167      - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
1168
1169    Note that the word vu is a repetition for any factorization (u,v) of x = uv,
1170    so every factorization has at least one repetition.
1171
1172    If x is a string and (u, v) is a factorization for x, then a *local period* for
1173    (u, v) is an integer r such that there is some word w such that |w| = r and w is
1174    a repetition for (u, v).
1175
1176    We denote by local_period(u, v) the smallest local period of (u, v). We sometimes
1177    call this *the local period* of (u, v). Provided that x = uv is non-empty, this
1178    is well-defined (because each non-empty word has at least one factorization, as
1179    noted above).
1180
1181    It can be proven that the following is an equivalent definition of a local period
1182    for a factorization (u, v): any positive integer r such that x[i] == x[i+r] for
1183    all i such that |u| - r <= i <= |u| - 1 and such that both x[i] and x[i+r] are
1184    defined. (i.e., i > 0 and i + r < |x|).
1185
1186    Using the above reformulation, it is easy to prove that
1187
1188        1 <= local_period(u, v) <= period(uv)
1189
1190    A factorization (u, v) of x such that local_period(u,v) = period(x) is called a
1191    *critical factorization*.
1192
1193    The algorithm hinges on the following theorem, which is stated without proof:
1194
1195    **Critical Factorization Theorem** Any word x has at least one critical
1196    factorization (u, v) such that |u| < period(x).
1197
1198    The purpose of maximal_suffix is to find such a critical factorization.
1199
1200    If the period is short, compute another factorization x = u' v' to use
1201    for reverse search, chosen instead so that |v'| < period(x).
1202
1203*/
1204impl TwoWaySearcher {
1205    fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
1206        let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
1207        let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
1208
1209        let (crit_pos, period) = if crit_pos_false > crit_pos_true {
1210            (crit_pos_false, period_false)
1211        } else {
1212            (crit_pos_true, period_true)
1213        };
1214
1215        // A particularly readable explanation of what's going on here can be found
1216        // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
1217        // see the code for "Algorithm CP" on p. 323.
1218        //
1219        // What's going on is we have some critical factorization (u, v) of the
1220        // needle, and we want to determine whether u is a suffix of
1221        // &v[..period]. If it is, we use "Algorithm CP1". Otherwise we use
1222        // "Algorithm CP2", which is optimized for when the period of the needle
1223        // is large.
1224        if needle[..crit_pos] == needle[period..period + crit_pos] {
1225            // short period case -- the period is exact
1226            // compute a separate critical factorization for the reversed needle
1227            // x = u' v' where |v'| < period(x).
1228            //
1229            // This is sped up by the period being known already.
1230            // Note that a case like x = "acba" may be factored exactly forwards
1231            // (crit_pos = 1, period = 3) while being factored with approximate
1232            // period in reverse (crit_pos = 2, period = 2). We use the given
1233            // reverse factorization but keep the exact period.
1234            let crit_pos_back = needle.len()
1235                - cmp::max(
1236                    TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
1237                    TwoWaySearcher::reverse_maximal_suffix(needle, period, true),
1238                );
1239
1240            TwoWaySearcher {
1241                crit_pos,
1242                crit_pos_back,
1243                period,
1244                byteset: Self::byteset_create(&needle[..period]),
1245
1246                position: 0,
1247                end,
1248                memory: 0,
1249                memory_back: needle.len(),
1250            }
1251        } else {
1252            // long period case -- we have an approximation to the actual period,
1253            // and don't use memorization.
1254            //
1255            // Approximate the period by lower bound max(|u|, |v|) + 1.
1256            // The critical factorization is efficient to use for both forward and
1257            // reverse search.
1258
1259            TwoWaySearcher {
1260                crit_pos,
1261                crit_pos_back: crit_pos,
1262                period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
1263                byteset: Self::byteset_create(needle),
1264
1265                position: 0,
1266                end,
1267                memory: usize::MAX, // Dummy value to signify that the period is long
1268                memory_back: usize::MAX,
1269            }
1270        }
1271    }
1272
1273    #[inline]
1274    fn byteset_create(bytes: &[u8]) -> u64 {
1275        bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
1276    }
1277
1278    #[inline]
1279    fn byteset_contains(&self, byte: u8) -> bool {
1280        (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
1281    }
1282
1283    // One of the main ideas of Two-Way is that we factorize the needle into
1284    // two halves, (u, v), and begin trying to find v in the haystack by scanning
1285    // left to right. If v matches, we try to match u by scanning right to left.
1286    // How far we can jump when we encounter a mismatch is all based on the fact
1287    // that (u, v) is a critical factorization for the needle.
1288    #[inline]
1289    fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1290    where
1291        S: TwoWayStrategy,
1292    {
1293        // `next()` uses `self.position` as its cursor
1294        let old_pos = self.position;
1295        let needle_last = needle.len() - 1;
1296        'search: loop {
1297            // Check that we have room to search in
1298            // position + needle_last can not overflow if we assume slices
1299            // are bounded by isize's range.
1300            let tail_byte = match haystack.get(self.position + needle_last) {
1301                Some(&b) => b,
1302                None => {
1303                    self.position = haystack.len();
1304                    return S::rejecting(old_pos, self.position);
1305                }
1306            };
1307
1308            if S::use_early_reject() && old_pos != self.position {
1309                return S::rejecting(old_pos, self.position);
1310            }
1311
1312            // Quickly skip by large portions unrelated to our substring
1313            if !self.byteset_contains(tail_byte) {
1314                self.position += needle.len();
1315                if !long_period {
1316                    self.memory = 0;
1317                }
1318                continue 'search;
1319            }
1320
1321            // See if the right part of the needle matches
1322            let start =
1323                if long_period { self.crit_pos } else { cmp::max(self.crit_pos, self.memory) };
1324            for i in start..needle.len() {
1325                if needle[i] != haystack[self.position + i] {
1326                    self.position += i - self.crit_pos + 1;
1327                    if !long_period {
1328                        self.memory = 0;
1329                    }
1330                    continue 'search;
1331                }
1332            }
1333
1334            // See if the left part of the needle matches
1335            let start = if long_period { 0 } else { self.memory };
1336            for i in (start..self.crit_pos).rev() {
1337                if needle[i] != haystack[self.position + i] {
1338                    self.position += self.period;
1339                    if !long_period {
1340                        self.memory = needle.len() - self.period;
1341                    }
1342                    continue 'search;
1343                }
1344            }
1345
1346            // We have found a match!
1347            let match_pos = self.position;
1348
1349            // Note: add self.period instead of needle.len() to have overlapping matches
1350            self.position += needle.len();
1351            if !long_period {
1352                self.memory = 0; // set to needle.len() - self.period for overlapping matches
1353            }
1354
1355            return S::matching(match_pos, match_pos + needle.len());
1356        }
1357    }
1358
1359    // Follows the ideas in `next()`.
1360    //
1361    // The definitions are symmetrical, with period(x) = period(reverse(x))
1362    // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
1363    // is a critical factorization, so is (reverse(v), reverse(u)).
1364    //
1365    // For the reverse case we have computed a critical factorization x = u' v'
1366    // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
1367    // thus |v'| < period(x) for the reverse.
1368    //
1369    // To search in reverse through the haystack, we search forward through
1370    // a reversed haystack with a reversed needle, matching first u' and then v'.
1371    #[inline]
1372    fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1373    where
1374        S: TwoWayStrategy,
1375    {
1376        // `next_back()` uses `self.end` as its cursor -- so that `next()` and `next_back()`
1377        // are independent.
1378        let old_end = self.end;
1379        'search: loop {
1380            // Check that we have room to search in
1381            // end - needle.len() will wrap around when there is no more room,
1382            // but due to slice length limits it can never wrap all the way back
1383            // into the length of haystack.
1384            let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
1385                Some(&b) => b,
1386                None => {
1387                    self.end = 0;
1388                    return S::rejecting(0, old_end);
1389                }
1390            };
1391
1392            if S::use_early_reject() && old_end != self.end {
1393                return S::rejecting(self.end, old_end);
1394            }
1395
1396            // Quickly skip by large portions unrelated to our substring
1397            if !self.byteset_contains(front_byte) {
1398                self.end -= needle.len();
1399                if !long_period {
1400                    self.memory_back = needle.len();
1401                }
1402                continue 'search;
1403            }
1404
1405            // See if the left part of the needle matches
1406            let crit = if long_period {
1407                self.crit_pos_back
1408            } else {
1409                cmp::min(self.crit_pos_back, self.memory_back)
1410            };
1411            for i in (0..crit).rev() {
1412                if needle[i] != haystack[self.end - needle.len() + i] {
1413                    self.end -= self.crit_pos_back - i;
1414                    if !long_period {
1415                        self.memory_back = needle.len();
1416                    }
1417                    continue 'search;
1418                }
1419            }
1420
1421            // See if the right part of the needle matches
1422            let needle_end = if long_period { needle.len() } else { self.memory_back };
1423            for i in self.crit_pos_back..needle_end {
1424                if needle[i] != haystack[self.end - needle.len() + i] {
1425                    self.end -= self.period;
1426                    if !long_period {
1427                        self.memory_back = self.period;
1428                    }
1429                    continue 'search;
1430                }
1431            }
1432
1433            // We have found a match!
1434            let match_pos = self.end - needle.len();
1435            // Note: sub self.period instead of needle.len() to have overlapping matches
1436            self.end -= needle.len();
1437            if !long_period {
1438                self.memory_back = needle.len();
1439            }
1440
1441            return S::matching(match_pos, match_pos + needle.len());
1442        }
1443    }
1444
1445    // Compute the maximal suffix of `arr`.
1446    //
1447    // The maximal suffix is a possible critical factorization (u, v) of `arr`.
1448    //
1449    // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the
1450    // period of v.
1451    //
1452    // `order_greater` determines if lexical order is `<` or `>`. Both
1453    // orders must be computed -- the ordering with the largest `i` gives
1454    // a critical factorization.
1455    //
1456    // For long period cases, the resulting period is not exact (it is too short).
1457    #[inline]
1458    fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1459        let mut left = 0; // Corresponds to i in the paper
1460        let mut right = 1; // Corresponds to j in the paper
1461        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1462        // to match 0-based indexing.
1463        let mut period = 1; // Corresponds to p in the paper
1464
1465        while let Some(&a) = arr.get(right + offset) {
1466            // `left` will be inbounds when `right` is.
1467            let b = arr[left + offset];
1468            if (a < b && !order_greater) || (a > b && order_greater) {
1469                // Suffix is smaller, period is entire prefix so far.
1470                right += offset + 1;
1471                offset = 0;
1472                period = right - left;
1473            } else if a == b {
1474                // Advance through repetition of the current period.
1475                if offset + 1 == period {
1476                    right += offset + 1;
1477                    offset = 0;
1478                } else {
1479                    offset += 1;
1480                }
1481            } else {
1482                // Suffix is larger, start over from current location.
1483                left = right;
1484                right += 1;
1485                offset = 0;
1486                period = 1;
1487            }
1488        }
1489        (left, period)
1490    }
1491
1492    // Compute the maximal suffix of the reverse of `arr`.
1493    //
1494    // The maximal suffix is a possible critical factorization (u', v') of `arr`.
1495    //
1496    // Returns `i` where `i` is the starting index of v', from the back;
1497    // returns immediately when a period of `known_period` is reached.
1498    //
1499    // `order_greater` determines if lexical order is `<` or `>`. Both
1500    // orders must be computed -- the ordering with the largest `i` gives
1501    // a critical factorization.
1502    //
1503    // For long period cases, the resulting period is not exact (it is too short).
1504    fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1505        let mut left = 0; // Corresponds to i in the paper
1506        let mut right = 1; // Corresponds to j in the paper
1507        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1508        // to match 0-based indexing.
1509        let mut period = 1; // Corresponds to p in the paper
1510        let n = arr.len();
1511
1512        while right + offset < n {
1513            let a = arr[n - (1 + right + offset)];
1514            let b = arr[n - (1 + left + offset)];
1515            if (a < b && !order_greater) || (a > b && order_greater) {
1516                // Suffix is smaller, period is entire prefix so far.
1517                right += offset + 1;
1518                offset = 0;
1519                period = right - left;
1520            } else if a == b {
1521                // Advance through repetition of the current period.
1522                if offset + 1 == period {
1523                    right += offset + 1;
1524                    offset = 0;
1525                } else {
1526                    offset += 1;
1527                }
1528            } else {
1529                // Suffix is larger, start over from current location.
1530                left = right;
1531                right += 1;
1532                offset = 0;
1533                period = 1;
1534            }
1535            if period == known_period {
1536                break;
1537            }
1538        }
1539        debug_assert!(period <= known_period);
1540        left
1541    }
1542}
1543
1544// TwoWayStrategy allows the algorithm to either skip non-matches as quickly
1545// as possible, or to work in a mode where it emits Rejects relatively quickly.
1546trait TwoWayStrategy {
1547    type Output;
1548    fn use_early_reject() -> bool;
1549    fn rejecting(a: usize, b: usize) -> Self::Output;
1550    fn matching(a: usize, b: usize) -> Self::Output;
1551}
1552
1553/// Skip to match intervals as quickly as possible
1554enum MatchOnly {}
1555
1556impl TwoWayStrategy for MatchOnly {
1557    type Output = Option<(usize, usize)>;
1558
1559    #[inline]
1560    fn use_early_reject() -> bool {
1561        false
1562    }
1563    #[inline]
1564    fn rejecting(_a: usize, _b: usize) -> Self::Output {
1565        None
1566    }
1567    #[inline]
1568    fn matching(a: usize, b: usize) -> Self::Output {
1569        Some((a, b))
1570    }
1571}
1572
1573/// Emit Rejects regularly
1574enum RejectAndMatch {}
1575
1576impl TwoWayStrategy for RejectAndMatch {
1577    type Output = SearchStep;
1578
1579    #[inline]
1580    fn use_early_reject() -> bool {
1581        true
1582    }
1583    #[inline]
1584    fn rejecting(a: usize, b: usize) -> Self::Output {
1585        SearchStep::Reject(a, b)
1586    }
1587    #[inline]
1588    fn matching(a: usize, b: usize) -> Self::Output {
1589        SearchStep::Match(a, b)
1590    }
1591}