LCOV - code coverage report
Current view: top level - source/str_view.c (source / functions) Coverage Total Hit
Test: SV Test Suite Coverage Report Lines: 90.9 % 803 730
Test Date: 2026-04-04 23:27:27 Functions: 97.4 % 76 74

            Line data    Source code
       1              : /* Author: Alexander G. Lopez
       2              :    ==========================
       3              :    This file implements the SV_Str_view interface as an interpretation of C++
       4              :    string_view type. There are some minor differences and C flavor thrown
       5              :    in. Additionally, there is a provided reimplementation of the Two-Way
       6              :    String-Searching algorithm, similar to glibc. */
       7              : #include "str_view.h"
       8              : 
       9              : #include <stdbool.h>
      10              : #include <stddef.h>
      11              : #include <stdint.h>
      12              : #include <string.h>
      13              : 
      14              : /* Clang and GCC support static array parameter declarations while
      15              :    MSVC does not. This is how to solve the differing declaration
      16              :    signature requirements. */
      17              : #if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_LLVM_COMPILER)
      18              : /* A static array parameter declaration helper. Function parameters
      19              :    may specify an array of a type of at least SIZE elements large,
      20              :    allowing compiler optimizations and safety errors. Specify
      21              :    a parameter such as `void func(int size, ARR_GEQ(arr, size))`. */
      22              : #    define ARR_GEQ(str, size) str[static(size)]
      23              : /* A static array parameter declaration helper. Function parameters
      24              :    may specify an array of a type of at least SIZE elements large,
      25              :    allowing compiler optimizations and safety errors. Specify
      26              :    a parameter such as `void func(int size, int ARR_GEQ(arr,size))`.
      27              :    This declarations adds the additional constraint that the pointer
      28              :    to the beginning of the array of types will not move. */
      29              : #    define ARR_CONST_GEQ(str, size) str[static const(size)]
      30              : #else
      31              : /* Dummy macro for MSVC compatibility. Specifies a function parameter shall
      32              :    have at least one element. Compiler warnings may differ from GCC/Clang. */
      33              : #    define ARR_GEQ(str, size) *str
      34              : /* Dummy macro for MSVC compatibility. Specifies a function parameter shall
      35              :    have at least one element. MSVC does not allow specification of a const
      36              :    pointer to the beginning of an array function parameter when using array
      37              :    size parameter syntax. Compiler warnings may differ from GCC/Clang. */
      38              : #    define ARR_CONST_GEQ(str, size) *const str
      39              : #endif
      40              : 
      41              : /* ========================   Type Definitions   =========================== */
      42              : 
      43              : /* Return the factorization step of two-way search in pre-compute phase. */
      44              : struct Factorization {
      45              :     /* Position in the needle at which (local period = period). */
      46              :     ptrdiff_t critical_position;
      47              :     /* A distance in the needle such that two letters always coincide. */
      48              :     ptrdiff_t period_distance;
      49              : };
      50              : 
      51              : /* Avoid giving the user a chance to dereference null as much as possible
      52              :    by returning this for various edge cases when it makes sense to communicate
      53              :    empty, null, invalid, not found etc. Used on cases by case basis.
      54              :    The function interfaces protect us from null pointers but not always. */
      55              : static SV_Str_view const nil = {
      56              :     .str = "",
      57              :     .len = 0,
      58              : };
      59              : 
      60              : /* =========================   Prototypes   =============================== */
      61              : 
      62              : static size_t after_find(SV_Str_view, SV_Str_view);
      63              : static size_t before_reverse_find(SV_Str_view, SV_Str_view);
      64              : static size_t min(size_t, size_t);
      65              : static SV_Order char_compare(char, char);
      66              : static ptrdiff_t signed_max(ptrdiff_t, ptrdiff_t);
      67              : 
      68              : /* Once the user facing API has verified the lengths of strings provided to
      69              :    views as inputs, internal code can take advantage of compiler optimizations
      70              :    by assuming the strings are GREATER than or EQUAL TO certain lengths
      71              :    allowing for processing by larger units than 1 in compiled code. */
      72              : 
      73              : static size_t position_memoized(ptrdiff_t haystack_size,
      74              :                                 char const ARR_GEQ(, haystack_size),
      75              :                                 ptrdiff_t needle_size,
      76              :                                 char const ARR_GEQ(, needle_size), ptrdiff_t,
      77              :                                 ptrdiff_t);
      78              : static size_t position_normal(ptrdiff_t haystack_size,
      79              :                               char const ARR_GEQ(, haystack_size),
      80              :                               ptrdiff_t needle_size,
      81              :                               char const ARR_GEQ(, needle_size), ptrdiff_t,
      82              :                               ptrdiff_t);
      83              : static size_t reverse_position_memoized(ptrdiff_t haystack_size,
      84              :                                         char const ARR_GEQ(, haystack_size),
      85              :                                         ptrdiff_t needle_size,
      86              :                                         char const ARR_GEQ(, needle_size),
      87              :                                         ptrdiff_t, ptrdiff_t);
      88              : static size_t reverse_position_normal(ptrdiff_t haystack_size,
      89              :                                       char const ARR_GEQ(, haystack_size),
      90              :                                       ptrdiff_t needle_size,
      91              :                                       char const ARR_GEQ(, needle_size),
      92              :                                       ptrdiff_t, ptrdiff_t);
      93              : static size_t two_way_match(ptrdiff_t haystack_size,
      94              :                             char const ARR_GEQ(, haystack_size),
      95              :                             ptrdiff_t needle_size,
      96              :                             char const ARR_GEQ(, needle_size));
      97              : static size_t two_way_reverse_match(ptrdiff_t haystack_size,
      98              :                                     char const ARR_GEQ(, haystack_size),
      99              :                                     ptrdiff_t needle_size,
     100              :                                     char const ARR_GEQ(, needle_size));
     101              : static struct Factorization maximal_suffix(ptrdiff_t needle_size,
     102              :                                            char const ARR_GEQ(, needle_size));
     103              : static struct Factorization
     104              : maximal_suffix_reverse(ptrdiff_t needle_size,
     105              :                        char const ARR_GEQ(, needle_size));
     106              : static struct Factorization
     107              : reverse_maximal_suffix(ptrdiff_t needle_size,
     108              :                        char const ARR_GEQ(, needle_size));
     109              : static struct Factorization
     110              : reverse_maximal_suffix_reverse(ptrdiff_t needle_size,
     111              :                                char const ARR_GEQ(, needle_size));
     112              : static size_t two_byte_view_match(size_t haystack_size,
     113              :                                   unsigned char const ARR_GEQ(, haystack_size),
     114              :                                   size_t n_size,
     115              :                                   unsigned char const ARR_GEQ(, n_size));
     116              : static size_t three_byte_view_match(size_t size,
     117              :                                     unsigned char const ARR_GEQ(, size),
     118              :                                     size_t n_size,
     119              :                                     unsigned char const ARR_GEQ(, n_size));
     120              : static size_t four_byte_view_match(size_t size,
     121              :                                    unsigned char const ARR_GEQ(, size),
     122              :                                    size_t n_size,
     123              :                                    unsigned char const ARR_GEQ(, n_size));
     124              : static size_t view_span_in_set_complement_length(
     125              :     size_t str_size, char const ARR_GEQ(, str_size), size_t set_size,
     126              :     char const ARR_GEQ(, set_size));
     127              : static size_t view_span_in_set_length(size_t str_size,
     128              :                                       char const ARR_GEQ(, str_size),
     129              :                                       size_t set_size,
     130              :                                       char const ARR_GEQ(, set_size));
     131              : static size_t view_match(ptrdiff_t haystack_size,
     132              :                          char const ARR_GEQ(, haystack_size),
     133              :                          ptrdiff_t needle_size,
     134              :                          char const ARR_GEQ(, needle_size));
     135              : static size_t view_match_char(size_t n, char const ARR_GEQ(, n), char);
     136              : static size_t reverse_view_match_char(size_t n, char const ARR_GEQ(, n), char);
     137              : static size_t reverse_view_match(ptrdiff_t haystack_size,
     138              :                                  char const ARR_GEQ(, haystack_size),
     139              :                                  ptrdiff_t needle_size,
     140              :                                  char const ARR_GEQ(, needle_size));
     141              : static size_t
     142              : reverse_two_byte_view_match(size_t size, unsigned char const ARR_GEQ(, size),
     143              :                             size_t n_size,
     144              :                             unsigned char const ARR_GEQ(, n_size));
     145              : static size_t
     146              : reverse_three_byte_view_match(size_t size, unsigned char const ARR_GEQ(, size),
     147              :                               size_t n_size,
     148              :                               unsigned char const ARR_GEQ(, n_size));
     149              : static size_t
     150              : reverse_four_byte_view_match(size_t size, unsigned char const ARR_GEQ(, size),
     151              :                              size_t n_size,
     152              :                              unsigned char const ARR_GEQ(, n_size));
     153              : 
     154              : /* ===================   Interface Implementation   ====================== */
     155              : 
     156              : SV_Str_view
     157          181 : SV_from_terminated(char const *const str) {
     158          181 :     if (!str) {
     159            0 :         return nil;
     160              :     }
     161          543 :     return (SV_Str_view){
     162          181 :         .str = str,
     163          181 :         .len = strlen(str),
     164              :     };
     165          181 : }
     166              : 
     167              : SV_Str_view
     168           11 : SV_from_view(size_t n, char const *const str) {
     169           11 :     if (!str) {
     170            0 :         return nil;
     171              :     }
     172           33 :     return (SV_Str_view){
     173           11 :         .str = str,
     174           11 :         .len = strnlen(str, n),
     175              :     };
     176           11 : }
     177              : 
     178              : SV_Str_view
     179           16 : SV_from_delimiter(char const *const str, char const *const delim) {
     180           16 :     if (!str) {
     181            0 :         return nil;
     182              :     }
     183           16 :     if (!delim) {
     184            0 :         return (SV_Str_view){
     185            0 :             .str = str,
     186            0 :             .len = strlen(str),
     187              :         };
     188              :     }
     189           16 :     return SV_token_begin(
     190           48 :         (SV_Str_view){
     191           16 :             .str = str,
     192           16 :             .len = strlen(str),
     193              :         },
     194           48 :         (SV_Str_view){
     195           16 :             .str = delim,
     196           16 :             .len = strlen(delim),
     197              :         });
     198           16 : }
     199              : 
     200              : SV_Str_view
     201            1 : SV_copy(size_t const str_bytes, char const *const src_str) {
     202            1 :     return SV_from_view(str_bytes, src_str);
     203            1 : }
     204              : 
     205              : size_t
     206           24 : SV_fill(size_t const dest_bytes, char *const dest_buf, SV_Str_view const src) {
     207           24 :     if (!dest_buf || !dest_bytes || !src.str || !src.len) {
     208            0 :         return 0;
     209              :     }
     210           24 :     size_t const bytes = min(dest_bytes, SV_bytes(src));
     211           24 :     memmove(dest_buf, src.str, bytes);
     212           24 :     dest_buf[bytes - 1] = '\0';
     213           24 :     return bytes;
     214           24 : }
     215              : 
     216              : bool
     217          110 : SV_is_empty(SV_Str_view const sv) {
     218          110 :     return !sv.len;
     219              : }
     220              : 
     221              : size_t
     222          491 : SV_len(SV_Str_view const sv) {
     223          491 :     return sv.len;
     224              : }
     225              : 
     226              : size_t
     227           39 : SV_bytes(SV_Str_view const sv) {
     228           39 :     return sv.len + 1;
     229              : }
     230              : 
     231              : size_t
     232            5 : SV_str_bytes(char const *const str) {
     233            5 :     if (!str) {
     234            0 :         return 0;
     235              :     }
     236            5 :     return strlen(str) + 1;
     237            5 : }
     238              : 
     239              : size_t
     240            4 : SV_min_len(char const *const str, size_t n) {
     241            4 :     return strnlen(str, n);
     242              : }
     243              : 
     244              : char
     245           33 : SV_at(SV_Str_view const sv, size_t const i) {
     246           33 :     if (i >= sv.len) {
     247            1 :         return *nil.str;
     248              :     }
     249           32 :     return sv.str[i];
     250           33 : }
     251              : 
     252              : char const *
     253            1 : SV_null(void) {
     254            1 :     return nil.str;
     255              : }
     256              : 
     257              : SV_Order
     258          153 : SV_compare(SV_Str_view const lhs, SV_Str_view const rhs) {
     259          153 :     if (!lhs.str || !rhs.str) {
     260            0 :         return SV_ORDER_ERROR;
     261              :     }
     262          153 :     size_t const size = min(lhs.len, rhs.len);
     263          153 :     size_t i = 0;
     264         1082 :     for (; i < size && lhs.str[i] == rhs.str[i]; ++i) {}
     265          153 :     if (i == lhs.len && i == rhs.len) {
     266          141 :         return SV_ORDER_EQUAL;
     267              :     }
     268           12 :     if (i < lhs.len && i < rhs.len) {
     269            9 :         return (uint8_t)lhs.str[i] < (uint8_t)rhs.str[i] ? SV_ORDER_LESSER
     270              :                                                          : SV_ORDER_GREATER;
     271              :     }
     272            3 :     return (i < lhs.len) ? SV_ORDER_GREATER : SV_ORDER_LESSER;
     273          153 : }
     274              : 
     275              : SV_Order
     276          241 : SV_terminated_compare(SV_Str_view const lhs, char const *const rhs) {
     277          241 :     if (!lhs.str || !rhs) {
     278            0 :         return SV_ORDER_ERROR;
     279              :     }
     280          241 :     size_t const size = lhs.len;
     281          241 :     size_t i = 0;
     282         2004 :     for (; i < size && rhs[i] && lhs.str[i] == rhs[i]; ++i) {}
     283          241 :     if (i == lhs.len && !rhs[i]) {
     284          222 :         return SV_ORDER_EQUAL;
     285              :     }
     286           19 :     if (i < lhs.len && rhs[i]) {
     287            9 :         return (uint8_t)lhs.str[i] < (uint8_t)rhs[i] ? SV_ORDER_LESSER
     288              :                                                      : SV_ORDER_GREATER;
     289              :     }
     290           10 :     return (i < lhs.len) ? SV_ORDER_GREATER : SV_ORDER_LESSER;
     291          241 : }
     292              : 
     293              : SV_Order
     294            2 : SV_view_compare(SV_Str_view const lhs, char const *const rhs, size_t const n) {
     295            2 :     if (!lhs.str || !rhs) {
     296            0 :         return SV_ORDER_ERROR;
     297              :     }
     298            2 :     size_t const size = min(lhs.len, n);
     299            2 :     size_t i = 0;
     300            8 :     for (; i < size && rhs[i] && lhs.str[i] == rhs[i]; ++i) {}
     301            2 :     if (i == lhs.len && size == n) {
     302            0 :         return SV_ORDER_EQUAL;
     303              :     }
     304              :     /* strncmp compares the first at most n bytes inclusive */
     305            2 :     if (i < lhs.len && size <= n) {
     306            2 :         return (uint8_t)lhs.str[i] < (uint8_t)rhs[i] ? SV_ORDER_LESSER
     307              :                                                      : SV_ORDER_GREATER;
     308              :     }
     309            0 :     return (i < lhs.len) ? SV_ORDER_GREATER : SV_ORDER_LESSER;
     310            2 : }
     311              : 
     312              : char
     313           89 : SV_front(SV_Str_view const sv) {
     314           89 :     if (!sv.str || !sv.len) {
     315           12 :         return *nil.str;
     316              :     }
     317           77 :     return *sv.str;
     318           89 : }
     319              : 
     320              : char
     321            3 : SV_back(SV_Str_view const sv) {
     322            3 :     if (!sv.str || !sv.len) {
     323            1 :         return *nil.str;
     324              :     }
     325            2 :     return sv.str[sv.len - 1];
     326            3 : }
     327              : 
     328              : char const *
     329          159 : SV_begin(SV_Str_view const sv) {
     330          159 :     if (!sv.str) {
     331            0 :         return nil.str;
     332              :     }
     333          159 :     return sv.str;
     334          159 : }
     335              : 
     336              : char const *
     337          136 : SV_end(SV_Str_view const sv) {
     338          136 :     if (!sv.str || sv.str == nil.str) {
     339            0 :         return nil.str;
     340              :     }
     341          136 :     return sv.str + sv.len;
     342          136 : }
     343              : 
     344              : char const *
     345          100 : SV_next(char const *c) {
     346          100 :     if (!c) {
     347            0 :         return nil.str;
     348              :     }
     349          100 :     return ++c;
     350          100 : }
     351              : 
     352              : char const *
     353            1 : SV_reverse_begin(SV_Str_view const sv) {
     354            1 :     if (!sv.str) {
     355            0 :         return nil.str;
     356              :     }
     357            1 :     if (!sv.len) {
     358            0 :         return sv.str;
     359              :     }
     360            1 :     return sv.str + sv.len - 1;
     361            1 : }
     362              : 
     363              : char const *
     364           31 : SV_reverse_end(SV_Str_view const sv) {
     365           31 :     if (!sv.str || sv.str == nil.str) {
     366            0 :         return nil.str;
     367              :     }
     368           31 :     if (!sv.len) {
     369            0 :         return sv.str;
     370              :     }
     371           31 :     return sv.str - 1;
     372           31 : }
     373              : 
     374              : char const *
     375           31 : SV_reverse_next(char const *c) {
     376           31 :     if (!c) {
     377            0 :         return nil.str;
     378              :     }
     379           31 :     return --c;
     380           31 : }
     381              : 
     382              : char const *
     383           48 : SV_pointer(SV_Str_view const sv, size_t const i) {
     384           48 :     if (!sv.str) {
     385            0 :         return nil.str;
     386              :     }
     387           48 :     if (i > sv.len) {
     388            1 :         return SV_end(sv);
     389              :     }
     390           47 :     return sv.str + i;
     391           48 : }
     392              : 
     393              : SV_Str_view
     394           70 : SV_token_begin(SV_Str_view src, SV_Str_view const delim) {
     395           70 :     if (!src.str) {
     396            0 :         return nil;
     397              :     }
     398           70 :     if (!delim.str) {
     399            0 :         return (SV_Str_view){.str = src.str + src.len, 0};
     400              :     }
     401           70 :     char const *const begin = src.str;
     402           70 :     size_t const sv_not = after_find(src, delim);
     403           70 :     src.str += sv_not;
     404           70 :     if (begin + src.len == src.str) {
     405            2 :         return (SV_Str_view){
     406            1 :             .str = src.str,
     407              :             .len = 0,
     408              :         };
     409              :     }
     410           69 :     src.len -= sv_not;
     411          207 :     return (SV_Str_view){
     412           69 :         .str = src.str,
     413           69 :         .len = SV_find(src, 0, delim),
     414              :     };
     415           70 : }
     416              : 
     417              : bool
     418          229 : SV_token_end(SV_Str_view const src, SV_Str_view const token) {
     419          229 :     return !token.len || token.str >= (src.str + src.len);
     420              : }
     421              : 
     422              : SV_Str_view
     423          177 : SV_token_next(SV_Str_view const src, SV_Str_view const token,
     424              :               SV_Str_view const delim) {
     425          177 :     if (!token.str) {
     426            0 :         return nil;
     427              :     }
     428          177 :     if (!delim.str || !token.str || !token.str[token.len]) {
     429           52 :         return (SV_Str_view){
     430           26 :             .str = &token.str[token.len],
     431              :             .len = 0,
     432              :         };
     433              :     }
     434          302 :     SV_Str_view next = {
     435          151 :         .str = &token.str[token.len] + delim.len,
     436              :     };
     437          151 :     if (next.str >= &src.str[src.len]) {
     438           54 :         return (SV_Str_view){
     439           27 :             .str = &src.str[src.len],
     440              :             .len = 0,
     441              :         };
     442              :     }
     443          124 :     next.len = (size_t)(&src.str[src.len] - next.str);
     444              :     /* There is a cheap easy way to skip repeating delimiters before the
     445              :        next search that should be faster than string comparison. */
     446          124 :     size_t const after_delim = after_find(next, delim);
     447          124 :     next.str += after_delim;
     448          124 :     next.len -= after_delim;
     449          124 :     if (next.str >= &src.str[src.len]) {
     450            2 :         return (SV_Str_view){
     451            1 :             .str = &src.str[src.len],
     452              :             .len = 0,
     453              :         };
     454              :     }
     455          246 :     size_t const found = view_match((ptrdiff_t)next.len, next.str,
     456          123 :                                     (ptrdiff_t)delim.len, delim.str);
     457          369 :     return (SV_Str_view){
     458          123 :         .str = next.str,
     459          123 :         .len = found,
     460              :     };
     461          177 : }
     462              : 
     463              : SV_Str_view
     464           53 : SV_token_reverse_begin(SV_Str_view src, SV_Str_view const delim) {
     465           53 :     if (!src.str) {
     466            0 :         return nil;
     467              :     }
     468           53 :     if (!delim.str) {
     469            0 :         return (SV_Str_view){
     470            0 :             .str = src.str + src.len,
     471              :             0,
     472              :         };
     473              :     }
     474           53 :     size_t before_delim = before_reverse_find(src, delim);
     475           53 :     src.len = min(src.len, before_delim + 1);
     476           53 :     size_t start = SV_reverse_find(src, src.len, delim);
     477           53 :     if (start == src.len) {
     478           11 :         return src;
     479              :     }
     480           42 :     start += delim.len;
     481          126 :     return (SV_Str_view){
     482           42 :         .str = src.str + start,
     483           42 :         .len = before_delim - start + 1,
     484              :     };
     485           53 : }
     486              : 
     487              : SV_Str_view
     488          180 : SV_token_reverse_next(SV_Str_view const src, SV_Str_view const token,
     489              :                       SV_Str_view const delim) {
     490          180 :     if (!token.str) {
     491            0 :         return nil;
     492              :     }
     493          180 :     if (!token.len | !delim.str || token.str == src.str
     494          180 :         || token.str - delim.len <= src.str) {
     495          106 :         return (SV_Str_view){
     496           53 :             .str = src.str,
     497              :             .len = 0,
     498              :         };
     499              :     }
     500          381 :     SV_Str_view const shorter = {
     501          127 :         .str = src.str,
     502          127 :         .len = (size_t)((token.str - delim.len) - src.str),
     503              :     };
     504              :     /* Same as in the forward version, this method is a quick way to skip
     505              :        any number of repeating delimiters before starting the next search
     506              :        for a delimiter before a token. */
     507          127 :     size_t const before_delim = before_reverse_find(shorter, delim);
     508          127 :     if (before_delim == shorter.len) {
     509            8 :         return shorter;
     510              :     }
     511          238 :     size_t start = reverse_view_match((ptrdiff_t)before_delim, shorter.str,
     512          119 :                                       (ptrdiff_t)delim.len, delim.str);
     513          119 :     if (start == before_delim) {
     514           24 :         return (SV_Str_view){
     515            8 :             .str = shorter.str,
     516            8 :             .len = before_delim + 1,
     517              :         };
     518              :     }
     519          111 :     start += delim.len;
     520          333 :     return (SV_Str_view){
     521          111 :         .str = src.str + start,
     522          111 :         .len = before_delim - start + 1,
     523              :     };
     524          180 : }
     525              : 
     526              : bool
     527          231 : SV_token_reverse_end(SV_Str_view const src, SV_Str_view const token) {
     528          231 :     return !token.len && token.str == src.str;
     529              : }
     530              : 
     531              : SV_Str_view
     532            1 : SV_extend(SV_Str_view sv) {
     533            1 :     if (!sv.str) {
     534            0 :         return nil;
     535              :     }
     536            1 :     char const *i = sv.str;
     537           35 :     while (*i++) {}
     538            1 :     sv.len = (size_t)(i - sv.str - 1);
     539            1 :     return sv;
     540            1 : }
     541              : 
     542              : bool
     543           12 : SV_starts_with(SV_Str_view const sv, SV_Str_view const prefix) {
     544           12 :     if (prefix.len > sv.len) {
     545            2 :         return false;
     546              :     }
     547           10 :     return SV_compare(SV_substr(sv, 0, prefix.len), prefix) == SV_ORDER_EQUAL;
     548           12 : }
     549              : 
     550              : SV_Str_view
     551           12 : SV_remove_prefix(SV_Str_view const sv, size_t const n) {
     552           12 :     size_t const remove = min(sv.len, n);
     553           36 :     return (SV_Str_view){
     554           12 :         .str = sv.str + remove,
     555           12 :         .len = sv.len - remove,
     556              :     };
     557           12 : }
     558              : 
     559              : bool
     560            0 : SV_ends_with(SV_Str_view const sv, SV_Str_view const suffix) {
     561            0 :     if (suffix.len > sv.len) {
     562            0 :         return false;
     563              :     }
     564            0 :     return SV_compare(SV_substr(sv, sv.len - suffix.len, suffix.len), suffix)
     565            0 :         == SV_ORDER_EQUAL;
     566            0 : }
     567              : 
     568              : SV_Str_view
     569           12 : SV_remove_suffix(SV_Str_view const sv, size_t const n) {
     570           12 :     if (!sv.str) {
     571            0 :         return nil;
     572              :     }
     573           36 :     return (SV_Str_view){
     574           12 :         .str = sv.str,
     575           12 :         .len = sv.len - min(sv.len, n),
     576              :     };
     577           12 : }
     578              : 
     579              : SV_Str_view
     580           29 : SV_substr(SV_Str_view const sv, size_t const pos, size_t const count) {
     581           29 :     if (pos > sv.len) {
     582            2 :         return (SV_Str_view){
     583            1 :             .str = sv.str + sv.len,
     584              :             .len = 0,
     585              :         };
     586              :     }
     587           84 :     return (SV_Str_view){
     588           28 :         .str = sv.str + pos,
     589           28 :         .len = min(count, sv.len - pos),
     590              :     };
     591           29 : }
     592              : 
     593              : bool
     594            0 : SV_contains(SV_Str_view const haystack, SV_Str_view const needle) {
     595            0 :     if (needle.len > haystack.len) {
     596            0 :         return false;
     597              :     }
     598            0 :     if (SV_is_empty(haystack)) {
     599            0 :         return false;
     600              :     }
     601            0 :     if (SV_is_empty(needle)) {
     602            0 :         return true;
     603              :     }
     604            0 :     return haystack.len
     605            0 :         != view_match((ptrdiff_t)haystack.len, haystack.str,
     606            0 :                       (ptrdiff_t)needle.len, needle.str);
     607            0 : }
     608              : 
     609              : SV_Str_view
     610           19 : SV_match(SV_Str_view const haystack, SV_Str_view const needle) {
     611           19 :     if (!haystack.str || !needle.str) {
     612            0 :         return nil;
     613              :     }
     614           19 :     if (needle.len > haystack.len || SV_is_empty(haystack)
     615           19 :         || SV_is_empty(needle)) {
     616            0 :         return (SV_Str_view){
     617            0 :             .str = haystack.str + haystack.len,
     618              :             .len = 0,
     619              :         };
     620              :     }
     621           38 :     size_t const found = view_match((ptrdiff_t)haystack.len, haystack.str,
     622           19 :                                     (ptrdiff_t)needle.len, needle.str);
     623           19 :     if (found == haystack.len) {
     624           10 :         return (SV_Str_view){
     625            5 :             .str = haystack.str + haystack.len,
     626              :             .len = 0,
     627              :         };
     628              :     }
     629           42 :     return (SV_Str_view){
     630           14 :         .str = haystack.str + found,
     631           14 :         .len = needle.len,
     632              :     };
     633           19 : }
     634              : 
     635              : SV_Str_view
     636           15 : SV_reverse_match(SV_Str_view const haystack, SV_Str_view const needle) {
     637           15 :     if (!haystack.str) {
     638            0 :         return nil;
     639              :     }
     640           15 :     if (SV_is_empty(haystack) || SV_is_empty(needle)) {
     641            0 :         return (SV_Str_view){
     642            0 :             .str = haystack.str + haystack.len,
     643              :             .len = 0,
     644              :         };
     645              :     }
     646           30 :     size_t const found
     647           30 :         = reverse_view_match((ptrdiff_t)haystack.len, haystack.str,
     648           15 :                              (ptrdiff_t)needle.len, needle.str);
     649           15 :     if (found == haystack.len) {
     650            5 :         return (SV_Str_view){.str = haystack.str + haystack.len, .len = 0};
     651              :     }
     652           30 :     return (SV_Str_view){
     653           10 :         .str = haystack.str + found,
     654           10 :         .len = needle.len,
     655              :     };
     656           15 : }
     657              : 
     658              : size_t
     659           85 : SV_find(SV_Str_view const haystack, size_t const pos,
     660              :         SV_Str_view const needle) {
     661           85 :     if (needle.len > haystack.len || pos > haystack.len) {
     662            7 :         return haystack.len;
     663              :     }
     664          156 :     return pos
     665          156 :          + view_match((ptrdiff_t)(haystack.len - pos), haystack.str + pos,
     666           78 :                       (ptrdiff_t)needle.len, needle.str);
     667           85 : }
     668              : 
     669              : size_t
     670           97 : SV_reverse_find(SV_Str_view const h, size_t pos, SV_Str_view const n) {
     671           97 :     if (!h.len || n.len > h.len) {
     672            5 :         return h.len;
     673              :     }
     674           92 :     if (pos >= h.len) {
     675           89 :         pos = h.len - 1;
     676           89 :     }
     677          184 :     size_t const found = reverse_view_match((ptrdiff_t)pos + 1, h.str,
     678           92 :                                             (ptrdiff_t)n.len, n.str);
     679           92 :     return found == pos + 1 ? h.len : found;
     680           97 : }
     681              : 
     682              : size_t
     683           12 : SV_find_first_of(SV_Str_view const haystack, SV_Str_view const set) {
     684           12 :     if (!haystack.str || !haystack.len) {
     685            0 :         return 0;
     686              :     }
     687           12 :     if (!set.str || !set.len) {
     688            1 :         return haystack.len;
     689              :     }
     690           22 :     return view_span_in_set_complement_length(haystack.len, haystack.str,
     691           11 :                                               set.len, set.str);
     692           12 : }
     693              : 
     694              : size_t
     695           11 : SV_find_last_of(SV_Str_view const haystack, SV_Str_view const set) {
     696           11 :     if (!haystack.str || !haystack.len) {
     697            0 :         return 0;
     698              :     }
     699           11 :     if (!set.str || !set.len) {
     700            0 :         return haystack.len;
     701              :     }
     702              :     /* It may be tempting to go right to left but consider if that really
     703              :        would be reliably faster across every possible string one encounters.
     704              :        The last occurrence of a set char could be anywhere in the string. */
     705           11 :     size_t last_pos = haystack.len;
     706          202 :     for (size_t in = 0, prev = 0;
     707          404 :          (in += view_span_in_set_length(haystack.len - in, haystack.str + in,
     708          202 :                                         set.len, set.str))
     709          202 :          != haystack.len;
     710          191 :          ++in, prev = in) {
     711          191 :         if (in != prev) {
     712           54 :             last_pos = in - 1;
     713           54 :         }
     714          191 :     }
     715           11 :     return last_pos;
     716           11 : }
     717              : 
     718              : size_t
     719            1 : SV_find_first_not_of(SV_Str_view const haystack, SV_Str_view const set) {
     720            1 :     if (!haystack.str || !haystack.len) {
     721            0 :         return 0;
     722              :     }
     723            1 :     if (!set.str || !set.len) {
     724            0 :         return 0;
     725              :     }
     726            2 :     return view_span_in_set_length(haystack.len, haystack.str, set.len,
     727            1 :                                    set.str);
     728            1 : }
     729              : 
     730              : size_t
     731            8 : SV_find_last_not_of(SV_Str_view const haystack, SV_Str_view const set) {
     732            8 :     if (!haystack.str || !haystack.len) {
     733            0 :         return 0;
     734              :     }
     735            8 :     if (!set.str || !set.len) {
     736            0 :         return haystack.len - 1;
     737              :     }
     738            8 :     size_t last_pos = haystack.len;
     739          110 :     for (size_t in = 0, prev = 0;
     740          220 :          (in += view_span_in_set_length(haystack.len - in, haystack.str + in,
     741          110 :                                         set.len, set.str))
     742          110 :          != haystack.len;
     743          102 :          ++in, prev = in) {
     744          102 :         if (in != prev) {
     745           21 :             last_pos = in;
     746           21 :         }
     747          102 :     }
     748            8 :     if (last_pos == haystack.len) {
     749            1 :         return haystack.len;
     750              :     }
     751           14 :     size_t const one_past_last_of = view_span_in_set_complement_length(
     752            7 :         haystack.len - last_pos, haystack.str + last_pos, set.len, set.str);
     753            7 :     return last_pos + one_past_last_of - 1;
     754            8 : }
     755              : 
     756              : size_t
     757            6 : SV_npos(SV_Str_view const sv) {
     758            6 :     return sv.len;
     759              : }
     760              : 
     761              : /* ======================   Static Helpers    ============================= */
     762              : 
     763              : static size_t
     764          194 : after_find(SV_Str_view const haystack, SV_Str_view const needle) {
     765          194 :     if (needle.len > haystack.len) {
     766           11 :         return 0;
     767              :     }
     768          183 :     size_t delim_i = 0;
     769          183 :     size_t i = 0;
     770          329 :     while (i < haystack.len && needle.str[delim_i] == haystack.str[i]) {
     771          146 :         delim_i = (delim_i + 1) % needle.len;
     772          146 :         ++i;
     773              :     }
     774              :     /* Also reset to the last mismatch found. If some of the delimeter matched
     775              :        but then the string changed into a mismatch go back to get characters
     776              :        that are partially in the delimeter. */
     777          183 :     return i - delim_i;
     778          194 : }
     779              : 
     780              : static size_t
     781          180 : before_reverse_find(SV_Str_view const haystack, SV_Str_view const needle) {
     782          180 :     if (needle.len > haystack.len || !needle.len || !haystack.len) {
     783            9 :         return haystack.len;
     784              :     }
     785          171 :     size_t delim_i = 0;
     786          171 :     size_t i = 0;
     787          454 :     while (i < haystack.len
     788          283 :            && needle.str[needle.len - delim_i - 1]
     789          283 :                   == haystack.str[haystack.len - i - 1]) {
     790          112 :         delim_i = (delim_i + 1) % needle.len;
     791          112 :         ++i;
     792              :     }
     793              :     /* Ugly logic to account for the reverse nature of this modulo search.
     794              :        the position needs to account for any part of the delim that may
     795              :        have started to match but then mismatched. The 1 is because
     796              :        this in an index being returned not a length. */
     797          171 :     return i == haystack.len ? haystack.len : haystack.len - i + delim_i - 1;
     798          180 : }
     799              : 
     800              : static inline size_t
     801          284 : min(size_t const a, size_t const b) {
     802          284 :     return a < b ? a : b;
     803              : }
     804              : 
     805              : static inline ptrdiff_t
     806          150 : signed_max(ptrdiff_t const a, ptrdiff_t const b) {
     807          150 :     return a > b ? a : b;
     808              : }
     809              : 
     810              : static inline SV_Order
     811         2162 : char_compare(char const a, char const b) {
     812         2162 :     return (a > b) - (a < b);
     813              : }
     814              : 
     815              : /* ======================   Static Utilities    =========================== */
     816              : 
     817              : /* This is section is modeled after the musl string.h library. However,
     818              :    using SV_Str_view that may not be null terminated requires modifications. */
     819              : 
     820              : static inline bool
     821          406 : bitset_set(size_t *const bitset, unsigned char const char_as_size_t) {
     822          812 :     return (bitset[char_as_size_t / (8 * sizeof(*bitset))]
     823          812 :             |= ((size_t)1 << (char_as_size_t % (8 * sizeof(*bitset)))))
     824          406 :         != 0;
     825              : }
     826              : 
     827              : static inline bool
     828          172 : bitset_test(size_t const *bitset, unsigned char const char_as_size_t) {
     829          344 :     return (bitset[char_as_size_t / (8 * sizeof(*bitset))]
     830          172 :             & ((size_t)1 << (char_as_size_t % (8 * sizeof(*bitset)))))
     831          172 :         != 0;
     832              : }
     833              : 
     834              : /* This is dangerous. Do not use this under normal circumstances.
     835              :    This is an internal helper for the backwards two way string
     836              :    searching algorithm. It expects that both arguments are
     837              :    greater than or equal to n bytes in length similar to how
     838              :    the forward version expects the same. However, the comparison
     839              :    moves backward from the location provided for n bytes. */
     840              : static int
     841           39 : reverse_memcmp(void const *const vl, void const *const vr, size_t n) {
     842           39 :     unsigned char const *l = vl;
     843           39 :     unsigned char const *r = vr;
     844           41 :     for (; n && *l == *r; n--, l--, r--) {}
     845           39 :     return n ? *l - *r : 0;
     846           39 : }
     847              : 
     848              : /* strcspn is based on musl C-standard library implementation
     849              :    http://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c
     850              :    A custom implementation is necessary because C standard library impls
     851              :    have no concept of a string view and will continue searching beyond the
     852              :    end of a view until null is found. This way, string searches are
     853              :    efficient and only within the range specified. */
     854              : static size_t
     855           18 : view_span_in_set_complement_length(size_t const str_size,
     856           18 :                                    char const ARR_CONST_GEQ(str, str_size),
     857              :                                    size_t const set_size,
     858           18 :                                    char const ARR_GEQ(set, set_size)) {
     859           18 :     if (!set_size) {
     860            0 :         return str_size;
     861              :     }
     862           18 :     char const *a = str;
     863           18 :     size_t byteset[32 / sizeof(size_t)] = {0};
     864           18 :     if (set_size == 1) {
     865           54 :         for (size_t i = 0; i < str_size && *a != *set; ++a, ++i) {}
     866           13 :         return (size_t)(a - str);
     867              :     }
     868           30 :     for (size_t i = 0; i < set_size && bitset_set(byteset, *set); ++set, ++i) {}
     869           17 :     for (size_t i = 0; i < str_size && !bitset_test(byteset, *a); ++a, ++i) {}
     870            5 :     return (size_t)(a - str);
     871           18 : }
     872              : 
     873              : /* strspn is based on musl C-standard library implementation
     874              :    https://git.musl-libc.org/cgit/musl/tree/src/string/strspn.c
     875              :    A custom implemenatation is necessary because C standard library impls
     876              :    have no concept of a string view and will continue searching beyond the
     877              :    end of a view until null is found. This way, string searches are
     878              :    efficient and only within the range specified. */
     879              : static size_t
     880          313 : view_span_in_set_length(size_t const str_size,
     881          313 :                         char const ARR_CONST_GEQ(str, str_size),
     882              :                         size_t const set_size,
     883          313 :                         char const ARR_GEQ(set, set_size)) {
     884          313 :     char const *a = str;
     885          313 :     size_t byteset[32 / sizeof(size_t)] = {0};
     886          313 :     if (!set_size) {
     887            0 :         return str_size;
     888              :     }
     889          313 :     if (set_size == 1) {
     890          303 :         for (size_t i = 0; i < str_size && *a == *set; ++a, ++i) {}
     891          237 :         return (size_t)(a - str);
     892              :     }
     893          914 :     for (size_t i = 0;
     894          457 :          i < set_size && bitset_set(byteset, *(unsigned char *)set);
     895          381 :          ++set, ++i) {}
     896          326 :     for (size_t i = 0;
     897          163 :          i < str_size && bitset_test(byteset, *(unsigned char *)a); ++a, ++i) {}
     898           76 :     return (size_t)(a - str);
     899          313 : }
     900              : 
     901              : /* Providing strnstrn rather than strstr at the lowest level works better
     902              :    for string views where the string may not be null terminated. There needs
     903              :    to always be the additional constraint that a search cannot exceed the
     904              :    haystack length. Returns 0 based index position at which needle begins in
     905              :    haystack if it can be found, otherwise the haystack size is returned. */
     906              : static size_t
     907          220 : view_match(ptrdiff_t const haystack_size,
     908          220 :            char const ARR_CONST_GEQ(haystack, haystack_size),
     909              :            ptrdiff_t const needle_size,
     910          220 :            char const ARR_CONST_GEQ(needle, needle_size)) {
     911          220 :     if (!haystack_size || !needle_size || needle_size > haystack_size) {
     912            9 :         return (size_t)haystack_size;
     913              :     }
     914          211 :     if (1 == needle_size) {
     915          111 :         return view_match_char((size_t)haystack_size, haystack, *needle);
     916              :     }
     917          100 :     if (2 == needle_size) {
     918           40 :         return two_byte_view_match((size_t)haystack_size,
     919           20 :                                    (unsigned char *)haystack, 2,
     920           20 :                                    (unsigned char *)needle);
     921              :     }
     922           80 :     if (3 == needle_size) {
     923           48 :         return three_byte_view_match((size_t)haystack_size,
     924           24 :                                      (unsigned char *)haystack, 3,
     925           24 :                                      (unsigned char *)needle);
     926              :     }
     927           56 :     if (4 == needle_size) {
     928           20 :         return four_byte_view_match((size_t)haystack_size,
     929           10 :                                     (unsigned char *)haystack, 4,
     930           10 :                                     (unsigned char *)needle);
     931              :     }
     932           46 :     return two_way_match(haystack_size, haystack, needle_size, needle);
     933          220 : }
     934              : 
     935              : /* For now reverse logic for backwards searches has been separated into
     936              :    other functions. There is a possible formula to unit the reverse and
     937              :    forward logic into one set of functions, but the code is ugly. See
     938              :    the start of the reverse two-way algorithm for more. May unite if
     939              :    a clean way exists. */
     940              : static size_t
     941          226 : reverse_view_match(ptrdiff_t const haystack_size,
     942          226 :                    char const ARR_CONST_GEQ(haystack, haystack_size),
     943              :                    ptrdiff_t const needle_size,
     944          226 :                    char const ARR_CONST_GEQ(needle, needle_size)) {
     945          226 :     if (!haystack_size || !needle_size || needle_size > haystack_size) {
     946            4 :         return (size_t)haystack_size;
     947              :     }
     948          222 :     if (1 == needle_size) {
     949          154 :         return reverse_view_match_char((size_t)haystack_size, haystack,
     950           77 :                                        *needle);
     951              :     }
     952          145 :     if (2 == needle_size) {
     953           84 :         return reverse_two_byte_view_match((size_t)haystack_size,
     954           42 :                                            (unsigned char *)haystack, 2,
     955           42 :                                            (unsigned char *)needle);
     956              :     }
     957          103 :     if (3 == needle_size) {
     958           78 :         return reverse_three_byte_view_match((size_t)haystack_size,
     959           39 :                                              (unsigned char *)haystack, 3,
     960           39 :                                              (unsigned char *)needle);
     961              :     }
     962           64 :     if (4 == needle_size) {
     963           50 :         return reverse_four_byte_view_match((size_t)haystack_size,
     964           25 :                                             (unsigned char *)haystack, 4,
     965           25 :                                             (unsigned char *)needle);
     966              :     }
     967           39 :     return two_way_reverse_match(haystack_size, haystack, needle_size, needle);
     968          226 : }
     969              : 
     970              : /*==============   Post-Precomputation Two-Way Search    =================*/
     971              : 
     972              : /* Definitions for Two-Way String-Matching taken from original authors:
     973              : 
     974              :    CROCHEMORE M., PERRIN D., 1991, Two-way string-matching,
     975              :    Journal of the ACM 38(3):651-675.
     976              : 
     977              :    This algorithm is used for its simplicity and low space requirement.
     978              :    What follows is a massively simplified approximation (due to my own
     979              :    lack of knowledge) of glibc's approach with the help of the ESMAJ
     980              :    website on string matching and the original paper in ACM.
     981              : 
     982              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
     983              : 
     984              :    Variable names and descriptions attempt to communicate authors' original
     985              :    meaning from the 1991 paper. */
     986              : 
     987              : /* Two Way string matching algorithm adapted from ESMAJ
     988              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
     989              : 
     990              :    Assumes the needle size is shorter than haystack size. Sizes are needed
     991              :    for string view operations because strings may not be null terminated
     992              :    and the string view library is most likely search a view rather than
     993              :    an entire string. Returns the position at which needle begins if found
     994              :    and the size of the haystack stack if not found. */
     995              : static inline size_t
     996           46 : two_way_match(ptrdiff_t const haystack_size,
     997           46 :               char const ARR_CONST_GEQ(haystack, haystack_size),
     998              :               ptrdiff_t const needle_size,
     999           46 :               char const ARR_CONST_GEQ(needle, needle_size)) {
    1000              :     /* Preprocessing to get critical position and period distance. */
    1001           46 :     struct Factorization const s = maximal_suffix(needle_size, needle);
    1002           46 :     struct Factorization const r = maximal_suffix_reverse(needle_size, needle);
    1003           46 :     struct Factorization const w
    1004           46 :         = (s.critical_position > r.critical_position) ? s : r;
    1005              :     /* Determine if memoization is available due to found border/overlap. */
    1006           92 :     if (!memcmp(needle, needle + w.period_distance,
    1007           46 :                 (size_t)w.critical_position + 1)) {
    1008           46 :         return position_memoized(haystack_size, haystack, needle_size, needle,
    1009           23 :                                  w.period_distance, w.critical_position);
    1010              :     }
    1011           46 :     return position_normal(haystack_size, haystack, needle_size, needle,
    1012           23 :                            w.period_distance, w.critical_position);
    1013           46 : }
    1014              : 
    1015              : /* Two Way string matching algorithm adapted from ESMAJ
    1016              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */
    1017              : static size_t
    1018           23 : position_memoized(ptrdiff_t const haystack_size,
    1019           23 :                   char const ARR_CONST_GEQ(haystack, haystack_size),
    1020              :                   ptrdiff_t const needle_size,
    1021           23 :                   char const ARR_CONST_GEQ(needle, needle_size),
    1022              :                   ptrdiff_t const period_dist, ptrdiff_t const critical_pos) {
    1023           23 :     ptrdiff_t lpos = 0;
    1024           23 :     ptrdiff_t rpos = 0;
    1025              :     /* Eliminate worst case quadratic time complexity with memoization. */
    1026           23 :     ptrdiff_t memoize_shift = -1;
    1027           69 :     while (lpos <= haystack_size - needle_size) {
    1028           69 :         rpos = signed_max(critical_pos, memoize_shift) + 1;
    1029          195 :         while (rpos < needle_size && needle[rpos] == haystack[rpos + lpos]) {
    1030          126 :             ++rpos;
    1031              :         }
    1032           69 :         if (rpos < needle_size) {
    1033           46 :             lpos += (rpos - critical_pos);
    1034           46 :             memoize_shift = -1;
    1035           46 :             continue;
    1036              :         }
    1037              :         /* r_pos >= needle_size */
    1038           23 :         rpos = critical_pos;
    1039           25 :         while (rpos > memoize_shift && needle[rpos] == haystack[rpos + lpos]) {
    1040            2 :             --rpos;
    1041              :         }
    1042           23 :         if (rpos <= memoize_shift) {
    1043           23 :             return (size_t)lpos;
    1044              :         }
    1045            0 :         lpos += period_dist;
    1046              :         /* Some prefix of needle coincides with the text. Memoize the length
    1047              :            of this prefix to increase length of next shift, if possible. */
    1048            0 :         memoize_shift = needle_size - period_dist - 1;
    1049              :     }
    1050            0 :     return (size_t)haystack_size;
    1051           23 : }
    1052              : 
    1053              : /* Two Way string matching algorithm adapted from ESMAJ
    1054              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */
    1055              : static size_t
    1056           23 : position_normal(ptrdiff_t const haystack_size,
    1057           23 :                 char const ARR_CONST_GEQ(haystack, haystack_size),
    1058              :                 ptrdiff_t const needle_size,
    1059           23 :                 char const ARR_CONST_GEQ(needle, needle_size),
    1060              :                 ptrdiff_t period_dist, ptrdiff_t const critical_pos) {
    1061              :     period_dist
    1062           23 :         = signed_max(critical_pos + 1, needle_size - critical_pos - 1) + 1;
    1063           23 :     ptrdiff_t lpos = 0;
    1064           23 :     ptrdiff_t rpos = 0;
    1065         1454 :     while (lpos <= haystack_size - needle_size) {
    1066         1453 :         rpos = critical_pos + 1;
    1067         1622 :         while (rpos < needle_size && needle[rpos] == haystack[rpos + lpos]) {
    1068          169 :             ++rpos;
    1069              :         }
    1070         1453 :         if (rpos < needle_size) {
    1071         1414 :             lpos += (rpos - critical_pos);
    1072         1414 :             continue;
    1073              :         }
    1074              :         /* r_pos >= needle_size */
    1075           39 :         rpos = critical_pos;
    1076          422 :         while (rpos >= 0 && needle[rpos] == haystack[rpos + lpos]) {
    1077          383 :             --rpos;
    1078              :         }
    1079           39 :         if (rpos < 0) {
    1080           22 :             return (size_t)lpos;
    1081              :         }
    1082           17 :         lpos += period_dist;
    1083              :     }
    1084            1 :     return (size_t)haystack_size;
    1085           23 : }
    1086              : 
    1087              : /* ==============   Suffix and Critical Factorization    =================*/
    1088              : 
    1089              : /* Computing of the maximal suffix. Adapted from ESMAJ.
    1090              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */
    1091              : static inline struct Factorization
    1092           46 : maximal_suffix(ptrdiff_t const needle_size,
    1093           46 :                char const ARR_CONST_GEQ(needle, needle_size)) {
    1094           46 :     ptrdiff_t suff_pos = -1;
    1095           46 :     ptrdiff_t period = 1;
    1096           46 :     ptrdiff_t last_rest = 0;
    1097           46 :     ptrdiff_t rest = 1;
    1098          570 :     while (last_rest + rest < needle_size) {
    1099          524 :         switch (
    1100          524 :             char_compare(needle[last_rest + rest], needle[suff_pos + rest])) {
    1101              :             case SV_ORDER_LESSER:
    1102          391 :                 last_rest += rest;
    1103          391 :                 rest = 1;
    1104          391 :                 period = last_rest - suff_pos;
    1105          391 :                 break;
    1106              :             case SV_ORDER_EQUAL:
    1107          101 :                 if (rest != period) {
    1108           11 :                     ++rest;
    1109           11 :                 } else {
    1110           90 :                     last_rest += period;
    1111           90 :                     rest = 1;
    1112              :                 }
    1113          101 :                 break;
    1114              :             case SV_ORDER_GREATER:
    1115           32 :                 suff_pos = last_rest;
    1116           32 :                 last_rest = suff_pos + 1;
    1117           32 :                 rest = period = 1;
    1118           32 :                 break;
    1119              :             default:
    1120            0 :                 break;
    1121              :         }
    1122              :     }
    1123          138 :     return (struct Factorization){
    1124           46 :         .critical_position = suff_pos,
    1125           46 :         .period_distance = period,
    1126              :     };
    1127           46 : }
    1128              : 
    1129              : /* Computing of the maximal suffix reverse. Sometimes called tilde.
    1130              :    adapted from ESMAJ
    1131              :    http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */
    1132              : static inline struct Factorization
    1133           46 : maximal_suffix_reverse(ptrdiff_t const needle_size,
    1134           46 :                        char const ARR_CONST_GEQ(needle, needle_size)) {
    1135           46 :     ptrdiff_t suff_pos = -1;
    1136           46 :     ptrdiff_t period = 1;
    1137           46 :     ptrdiff_t last_rest = 0;
    1138           46 :     ptrdiff_t rest = 1;
    1139          578 :     while (last_rest + rest < needle_size) {
    1140          532 :         switch (
    1141          532 :             char_compare(needle[last_rest + rest], needle[suff_pos + rest])) {
    1142              :             case SV_ORDER_GREATER:
    1143          330 :                 last_rest += rest;
    1144          330 :                 rest = 1;
    1145          330 :                 period = last_rest - suff_pos;
    1146          330 :                 break;
    1147              :             case SV_ORDER_EQUAL:
    1148          160 :                 if (rest != period) {
    1149           60 :                     ++rest;
    1150           60 :                 } else {
    1151          100 :                     last_rest += period;
    1152          100 :                     rest = 1;
    1153              :                 }
    1154          160 :                 break;
    1155              :             case SV_ORDER_LESSER:
    1156           42 :                 suff_pos = last_rest;
    1157           42 :                 last_rest = suff_pos + 1;
    1158           42 :                 rest = period = 1;
    1159           42 :                 break;
    1160              :             default:
    1161            0 :                 break;
    1162              :         }
    1163              :     }
    1164          138 :     return (struct Factorization){
    1165           46 :         .critical_position = suff_pos,
    1166           46 :         .period_distance = period,
    1167              :     };
    1168           46 : }
    1169              : 
    1170              : /*=======================  Right to Left Search  ===========================*/
    1171              : 
    1172              : /* Two way algorithm is easy to reverse. Instead of trying to reverse all
    1173              :    logic in the factorizations and two way searches, leave the algorithms
    1174              :    and calculate the values returned as offsets from the end of the string
    1175              :    instead of indices starting from 0. It would be even be possible to unite
    1176              :    these functions into one with the following formula
    1177              :    (using the suffix calculation as an example):
    1178              : 
    1179              :         ptrdiff_t suff_pos = -1;
    1180              :         ptrdiff_t period = 1;
    1181              :         ptrdiff_t last_rest = 0;
    1182              :         ptrdiff_t rest = 1;
    1183              :         ptrdiff_t negate_size = 0;
    1184              :         ptrdiff_t negate_one = 0;
    1185              :         if (direction == FORWARD)
    1186              :         {
    1187              :             negate_size = needle_size;
    1188              :             negate_one = 1;
    1189              :         }
    1190              :         while (last_rest + rest < needle_size)
    1191              :         {
    1192              :             switch (SV_char_cmp(
    1193              :                 needle[needle_size
    1194              :                         - (last_rest + rest) - 1 + negate_size + negate_one],
    1195              :                 needle[needle_size
    1196              :                         - (suff_pos + rest) - 1 + negate_size + negate_one]))
    1197              :             {
    1198              :             ...
    1199              : 
    1200              :    That would save the code repitition across all of the following
    1201              :    functions but probably would make the code even harder to read and
    1202              :    maintain. These algorithms are dense enough already so I think repetion
    1203              :    is fine. Leaving comment here if that changes or an even better way comes
    1204              :    along. */
    1205              : 
    1206              : /* Searches a string from right to left with a two-way algorithm. Returns
    1207              :    the position of the start of the strig if found and string size if not. */
    1208              : static inline size_t
    1209           39 : two_way_reverse_match(ptrdiff_t const haystack_size,
    1210           39 :                       char const ARR_CONST_GEQ(haystack, haystack_size),
    1211              :                       ptrdiff_t const needle_size,
    1212           39 :                       char const ARR_CONST_GEQ(needle, needle_size)) {
    1213           39 :     struct Factorization const s = reverse_maximal_suffix(needle_size, needle);
    1214           39 :     struct Factorization const r
    1215           39 :         = reverse_maximal_suffix_reverse(needle_size, needle);
    1216           39 :     struct Factorization const w
    1217           39 :         = (s.critical_position > r.critical_position) ? s : r;
    1218           78 :     if (!reverse_memcmp(needle + needle_size - 1,
    1219           39 :                         needle + needle_size - w.period_distance - 1,
    1220           39 :                         (size_t)w.critical_position + 1)) {
    1221           46 :         return reverse_position_memoized(haystack_size, haystack, needle_size,
    1222           23 :                                          needle, w.period_distance,
    1223           23 :                                          w.critical_position);
    1224              :     }
    1225           32 :     return reverse_position_normal(haystack_size, haystack, needle_size, needle,
    1226           16 :                                    w.period_distance, w.critical_position);
    1227           39 : }
    1228              : 
    1229              : static size_t
    1230           23 : reverse_position_memoized(ptrdiff_t const haystack_size,
    1231           23 :                           char const ARR_CONST_GEQ(haystack, haystack_size),
    1232              :                           ptrdiff_t const needle_size,
    1233           23 :                           char const ARR_CONST_GEQ(needle, needle_size),
    1234              :                           ptrdiff_t const period_dist,
    1235              :                           ptrdiff_t const critical_pos) {
    1236           23 :     ptrdiff_t lpos = 0;
    1237           23 :     ptrdiff_t rpos = 0;
    1238           23 :     ptrdiff_t memoize_shift = -1;
    1239           42 :     while (lpos <= haystack_size - needle_size) {
    1240           42 :         rpos = signed_max(critical_pos, memoize_shift) + 1;
    1241          208 :         while (rpos < needle_size
    1242          166 :                && needle[needle_size - rpos - 1]
    1243          143 :                       == haystack[haystack_size - (rpos + lpos) - 1]) {
    1244          124 :             ++rpos;
    1245              :         }
    1246           42 :         if (rpos < needle_size) {
    1247           19 :             lpos += (rpos - critical_pos);
    1248           19 :             memoize_shift = -1;
    1249           19 :             continue;
    1250              :         }
    1251              :         /* r_pos >= needle_size */
    1252           23 :         rpos = critical_pos;
    1253           27 :         while (rpos > memoize_shift
    1254           25 :                && needle[needle_size - rpos - 1]
    1255            2 :                       == haystack[haystack_size - (rpos + lpos) - 1]) {
    1256            2 :             --rpos;
    1257              :         }
    1258           23 :         if (rpos <= memoize_shift) {
    1259           23 :             return (size_t)(haystack_size - lpos - needle_size);
    1260              :         }
    1261            0 :         lpos += period_dist;
    1262              :         /* Some prefix of needle coincides with the text. Memoize the length
    1263              :            of this prefix to increase length of next shift, if possible. */
    1264            0 :         memoize_shift = needle_size - period_dist - 1;
    1265              :     }
    1266            0 :     return (size_t)haystack_size;
    1267           23 : }
    1268              : 
    1269              : static size_t
    1270           16 : reverse_position_normal(ptrdiff_t const haystack_size,
    1271           16 :                         char const ARR_CONST_GEQ(haystack, haystack_size),
    1272              :                         ptrdiff_t const needle_size,
    1273           16 :                         char const ARR_CONST_GEQ(needle, needle_size),
    1274              :                         ptrdiff_t period_dist, ptrdiff_t const critical_pos) {
    1275              :     period_dist
    1276           16 :         = signed_max(critical_pos + 1, needle_size - critical_pos - 1) + 1;
    1277           16 :     ptrdiff_t lpos = 0;
    1278           16 :     ptrdiff_t rpos = 0;
    1279          831 :     while (lpos <= haystack_size - needle_size) {
    1280          828 :         rpos = critical_pos + 1;
    1281         1975 :         while (rpos < needle_size
    1282         1147 :                && (needle[needle_size - rpos - 1]
    1283         1104 :                    == haystack[haystack_size - (rpos + lpos) - 1])) {
    1284          319 :             ++rpos;
    1285              :         }
    1286          828 :         if (rpos < needle_size) {
    1287          785 :             lpos += (rpos - critical_pos);
    1288          785 :             continue;
    1289              :         }
    1290              :         /* r_pos >= needle_size */
    1291           43 :         rpos = critical_pos;
    1292          294 :         while (rpos >= 0
    1293          251 :                && needle[needle_size - rpos - 1]
    1294          238 :                       == haystack[haystack_size - (rpos + lpos) - 1]) {
    1295          208 :             --rpos;
    1296              :         }
    1297           43 :         if (rpos < 0) {
    1298           13 :             return (size_t)(haystack_size - lpos - needle_size);
    1299              :         }
    1300           30 :         lpos += period_dist;
    1301              :     }
    1302            3 :     return (size_t)haystack_size;
    1303           16 : }
    1304              : 
    1305              : static inline struct Factorization
    1306           39 : reverse_maximal_suffix(ptrdiff_t const needle_size,
    1307           39 :                        char const ARR_CONST_GEQ(needle, needle_size)) {
    1308           39 :     ptrdiff_t suff_pos = -1;
    1309           39 :     ptrdiff_t period = 1;
    1310           39 :     ptrdiff_t last_rest = 0;
    1311           39 :     ptrdiff_t rest = 1;
    1312          590 :     while (last_rest + rest < needle_size) {
    1313         1102 :         switch (char_compare(needle[needle_size - (last_rest + rest) - 1],
    1314          551 :                              needle[needle_size - (suff_pos + rest) - 1])) {
    1315              :             case SV_ORDER_LESSER:
    1316          406 :                 last_rest += rest;
    1317          406 :                 rest = 1;
    1318          406 :                 period = last_rest - suff_pos;
    1319          406 :                 break;
    1320              :             case SV_ORDER_EQUAL:
    1321           95 :                 if (rest != period) {
    1322            5 :                     ++rest;
    1323            5 :                 } else {
    1324           90 :                     last_rest += period;
    1325           90 :                     rest = 1;
    1326              :                 }
    1327           95 :                 break;
    1328              :             case SV_ORDER_GREATER:
    1329           50 :                 suff_pos = last_rest;
    1330           50 :                 last_rest = suff_pos + 1;
    1331           50 :                 rest = period = 1;
    1332           50 :                 break;
    1333              :             default:
    1334            0 :                 break;
    1335              :         }
    1336              :     }
    1337          117 :     return (struct Factorization){
    1338           39 :         .critical_position = suff_pos,
    1339           39 :         .period_distance = period,
    1340              :     };
    1341           39 : }
    1342              : 
    1343              : static inline struct Factorization
    1344           39 : reverse_maximal_suffix_reverse(ptrdiff_t const needle_size,
    1345           39 :                                char const ARR_CONST_GEQ(needle, needle_size)) {
    1346           39 :     ptrdiff_t suff_pos = -1;
    1347           39 :     ptrdiff_t period = 1;
    1348           39 :     ptrdiff_t last_rest = 0;
    1349           39 :     ptrdiff_t rest = 1;
    1350          594 :     while (last_rest + rest < needle_size) {
    1351         1110 :         switch (char_compare(needle[needle_size - (last_rest + rest) - 1],
    1352          555 :                              needle[needle_size - (suff_pos + rest) - 1])) {
    1353              :             case SV_ORDER_GREATER:
    1354          379 :                 last_rest += rest;
    1355          379 :                 rest = 1;
    1356          379 :                 period = last_rest - suff_pos;
    1357          379 :                 break;
    1358              :             case SV_ORDER_EQUAL:
    1359          153 :                 if (rest != period) {
    1360           63 :                     ++rest;
    1361           63 :                 } else {
    1362           90 :                     last_rest += period;
    1363           90 :                     rest = 1;
    1364              :                 }
    1365          153 :                 break;
    1366              :             case SV_ORDER_LESSER:
    1367           23 :                 suff_pos = last_rest;
    1368           23 :                 last_rest = suff_pos + 1;
    1369           23 :                 rest = period = 1;
    1370           23 :                 break;
    1371              :             default:
    1372            0 :                 break;
    1373              :         }
    1374              :     }
    1375          117 :     return (struct Factorization){
    1376           39 :         .critical_position = suff_pos,
    1377           39 :         .period_distance = period,
    1378              :     };
    1379           39 : }
    1380              : 
    1381              : /* ======================   Brute Force Search    ==========================
    1382              :  */
    1383              : 
    1384              : /* All brute force searches adapted from musl C library.
    1385              :    http://git.musl-libc.org/cgit/musl/tree/src/string/strstr.c
    1386              :    They must stop the search at haystack size and therefore required slight
    1387              :    modification because string views may not be null terminated. Reverse
    1388              :    methods are my own additions provided to support a compliant reverse
    1389              :    search for rfind which most string libraries specify must search right
    1390              :    to left. Also having a reverse tokenizer is convenient and also relies
    1391              :    on right to left brute force searches. */
    1392              : static inline size_t
    1393          111 : view_match_char(size_t n, char const ARR_GEQ(s, n), char const c) {
    1394          111 :     size_t i = 0;
    1395          772 :     for (; n && *s != c; s++, --n, ++i) {}
    1396          222 :     return i;
    1397          111 : }
    1398              : 
    1399              : static inline size_t
    1400           77 : reverse_view_match_char(size_t const n, char const ARR_CONST_GEQ(s, n),
    1401              :                         char const c) {
    1402           77 :     char const *x = s + n - 1;
    1403           77 :     size_t i = n;
    1404          639 :     for (; i && *x != c; x--, --i) {}
    1405           77 :     return i ? i - 1 : n;
    1406           77 : }
    1407              : 
    1408              : static inline size_t
    1409           20 : two_byte_view_match(size_t const size, unsigned char const ARR_GEQ(h, size),
    1410              :                     size_t const n_size,
    1411           20 :                     unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1412           20 :     unsigned char const *const end = h + size;
    1413           20 :     uint16_t nw = (uint16_t)(n[0] << 8 | n[1]);
    1414           20 :     uint16_t hw = (uint16_t)(h[0] << 8 | h[1]);
    1415           87 :     for (++h; hw != nw && ++h < end; hw = (uint16_t)(hw << 8) | *h) {}
    1416           20 :     return h >= end ? size : (size - (size_t)(end - h)) - 1;
    1417           20 : }
    1418              : 
    1419              : static inline size_t
    1420           42 : reverse_two_byte_view_match(size_t const size,
    1421           42 :                             unsigned char const ARR_CONST_GEQ(h, size),
    1422              :                             size_t const n_size,
    1423           42 :                             unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1424           42 :     unsigned char const *i = h + (size - 2);
    1425           42 :     uint16_t nw = (uint16_t)(n[0] << 8 | n[1]);
    1426           42 :     uint16_t iw = (uint16_t)(i[0] << 8 | i[1]);
    1427              :     /* The search is right to left therefore the Most Significant Byte will
    1428              :        be the leading character of the string and the previous leading
    1429              :        character is shifted to the right. */
    1430          224 :     for (; iw != nw && --i >= h; iw = (uint16_t)((iw >> 8) | (*i << 8))) {}
    1431           42 :     return i < h ? size : (size_t)(i - h);
    1432           42 : }
    1433              : 
    1434              : static inline size_t
    1435           24 : three_byte_view_match(size_t const size, unsigned char const ARR_GEQ(h, size),
    1436              :                       size_t const n_size,
    1437           24 :                       unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1438           24 :     unsigned char const *const end = h + size;
    1439           24 :     uint32_t nw = (uint32_t)(n[0] << 24 | n[1] << 16 | n[2] << 8);
    1440           24 :     uint32_t hw = (uint32_t)(h[0] << 24 | h[1] << 16 | h[2] << 8);
    1441          111 :     for (h += 2; hw != nw && ++h < end; hw = (hw | *h) << 8) {}
    1442           24 :     return h >= end ? size : (size - (size_t)(end - h)) - 2;
    1443           24 : }
    1444              : 
    1445              : static inline size_t
    1446           39 : reverse_three_byte_view_match(size_t const size,
    1447           39 :                               unsigned char const ARR_CONST_GEQ(h, size),
    1448              :                               size_t const n_size,
    1449           39 :                               unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1450           39 :     unsigned char const *i = h + (size - 3);
    1451           39 :     uint32_t nw = (uint32_t)(n[0] << 16 | n[1] << 8 | n[2]);
    1452           39 :     uint32_t iw = (uint32_t)(i[0] << 16 | i[1] << 8 | i[2]);
    1453              :     /* Align the bits with fewer left shifts such that as the parsing
    1454              :        progresses right left, the leading character always takes highest
    1455              :        bit position and there is no need for any masking. */
    1456          249 :     for (; iw != nw && --i >= h; iw = (iw >> 8) | (uint32_t)(*i << 16)) {}
    1457           39 :     return i < h ? size : (size_t)(i - h);
    1458           39 : }
    1459              : 
    1460              : static inline size_t
    1461           10 : four_byte_view_match(size_t const size, unsigned char const ARR_GEQ(h, size),
    1462              :                      size_t const n_size,
    1463           10 :                      unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1464           10 :     unsigned char const *const end = h + size;
    1465           10 :     uint32_t nw = (uint32_t)(n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]);
    1466           10 :     uint32_t hw = (uint32_t)(h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]);
    1467           72 :     for (h += 3; hw != nw && ++h < end; hw = (hw << 8) | *h) {}
    1468           10 :     return h >= end ? size : (size - (size_t)(end - h)) - 3;
    1469           10 : }
    1470              : 
    1471              : static inline size_t
    1472           25 : reverse_four_byte_view_match(size_t const size,
    1473           25 :                              unsigned char const ARR_CONST_GEQ(h, size),
    1474              :                              size_t const n_size,
    1475           25 :                              unsigned char const ARR_CONST_GEQ(n, n_size)) {
    1476           25 :     unsigned char const *i = h + (size - 4);
    1477           25 :     uint32_t nw = (uint32_t)(n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]);
    1478           25 :     uint32_t iw = (uint32_t)(i[0] << 24 | i[1] << 16 | i[2] << 8 | i[3]);
    1479              :     /* Now that all four bytes of the unsigned int are used the shifting
    1480              :        becomes more intuitive. The window slides left to right and the
    1481              :        next leading character takes the high bit position. */
    1482          226 :     for (; iw != nw && --i >= h; iw = (iw >> 8) | (uint32_t)(*i << 24)) {}
    1483           25 :     return i < h ? size : (size_t)(i - h);
    1484           25 : }
        

Generated by: LCOV version 2.4.1-beta