LCOV - code coverage report
Current view: top level - source/flat_bitset.c (source / functions) Coverage Total Hit
Test: CCC Test Suite Coverage Report Lines: 99.8 % 851 849
Test Date: 2026-05-12 15:05:06 Functions: 100.0 % 80 80

            Line data    Source code
       1              : /** Copyright 2025 Alexander G. Lopez
       2              : 
       3              : Licensed under the Apache License, Version 2.0 (the "License");
       4              : you may not use this file except in compliance with the License.
       5              : You may obtain a copy of the License at
       6              : 
       7              :    http://www.apache.org/licenses/LICENSE-2.0
       8              : 
       9              : Unless required by applicable law or agreed to in writing, software
      10              : distributed under the License is distributed on an "AS IS" BASIS,
      11              : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12              : See the License for the specific language governing permissions and
      13              : limitations under the License.
      14              : 
      15              : This file implements a Bit Set using blocks of platform defined integers
      16              : for speed and efficiency. The implementation aims for constant and linear time
      17              : operations at all times, specifically when implementing more complicated range
      18              : based operations over the set.
      19              : 
      20              : It is also important to avoid modulo and division operations whenever possible.
      21              : This is why much of the code revolves around obtaining indices by processing
      22              : entire blocks at a time, rather than using mathematical operations to
      23              : conceptually iterate over individual bits.
      24              : 
      25              : Finally, the code is able to unite most functions for finding zeros or ones
      26              : into a single function that accepts true or false as an additional argument.
      27              : This is because every search for a zero can be solved by bitwise inverting a
      28              : block and searching for a 1 instead. This elimination of identical functions
      29              : costs a single branch in the function and is worth it to avoid code duplication
      30              : and bug doubling. */
      31              : /** C23 provided headers. */
      32              : #include <assert.h>
      33              : #include <limits.h>
      34              : #include <stddef.h>
      35              : #include <stdint.h>
      36              : 
      37              : /** CCC provided headers. */
      38              : #include "ccc/configuration.h"
      39              : #include "ccc/flat_bitset.h"
      40              : #include "ccc/private/private_flat_bitset.h"
      41              : #include "ccc/types.h"
      42              : 
      43              : /*=========================   Type Declarations  ============================*/
      44              : 
      45              : /** @internal The type representing the word size used for each block of the
      46              : Flat_bitset. This type may vary depending on the platform in order to maximize
      47              : performance, so it is given a type. The implementation then should not concern
      48              : itself with the word size.
      49              : 
      50              : The implementation may assume that the block is a standard integer width on
      51              : the given platform that is compatible with all basic arithmetic and bitwise
      52              : operations. If SIMD is implemented then multiple blocks may be processed under
      53              : this same assumption. */
      54              : typedef typeof(*(struct CCC_Flat_bitset){}.blocks) Bit_block;
      55              : 
      56              : /** @internal Used frequently so call the builtin just once. */
      57              : enum : size_t {
      58              :     /** @internal Bytes of a bit block to help with byte calculations. */
      59              :     SIZEOF_BLOCK = sizeof(Bit_block),
      60              : };
      61              : 
      62              : /** @internal Various constants to support bit block size bit ops. */
      63              : enum : Bit_block {
      64              :     /** @internal A mask of a bit block with all bits on. */
      65              :     BLOCK_ON = ((Bit_block)~0),
      66              :     /** @internal The Most Significant Bit of a bit block turned on to 1. */
      67              :     BLOCK_MSB = (((Bit_block)1) << (((SIZEOF_BLOCK * CHAR_BIT)) - 1)),
      68              : };
      69              : 
      70              : /** @internal An index into the block array or count of bit blocks. The block
      71              : array is constructed by the number of blocks required to support the current bit
      72              : set capacity. Assume this index type has range [0, block count to support N
      73              : bits].
      74              : 
      75              : User input is given as a `size_t` so distinguish from that input with this type
      76              : to make it clear to the reader the index refers to a block not the given bit
      77              : index the user has provided. */
      78              : typedef size_t Block_count;
      79              : 
      80              : /** @internal An index within a block. A block is set to some number of bits
      81              : as determined by the type used for each block. This type is intended to count
      82              : bits in a block and therefore cannot count up to arbitrary indices. Assume its
      83              : range is `[0, BIT_BLOCK_BITS]`, for ease of use and clarity.
      84              : 
      85              : There are many types of indexing and counting that take place in a bit set so
      86              : use this type specifically for counting bits in a block so the reader is clear
      87              : on intent. */
      88              : typedef uint8_t Bit_count;
      89              : 
      90              : enum : Bit_count {
      91              :     /** @internal How many total bits that fit in a bit block. */
      92              :     BLOCK_BITS = (SIZEOF_BLOCK * CHAR_BIT),
      93              :     /** @internal Used for static assert clarity. */
      94              :     U8_BLOCK_MAX = UINT8_MAX,
      95              :     /** @internal Hand coded log2 of block bits to avoid division. */
      96              :     BLOCK_BITS_LOG2 = 5,
      97              : };
      98              : static_assert(
      99              :     (BLOCK_BITS & (BLOCK_BITS - 1)) == 0,
     100              :     "the number of bits in a block is always a power of two, "
     101              :     "helping avoid division and modulo operations when possible"
     102              : );
     103              : static_assert(
     104              :     BLOCK_BITS >> BLOCK_BITS_LOG2 == 1,
     105              :     "hand coded log2 of bitblock bits is always correct"
     106              : );
     107              : static_assert(
     108              :     (Bit_count) ~((Bit_count)0) >= (Bit_count)0, "Bit_count must be unsigned"
     109              : );
     110              : static_assert(UINT8_MAX >= BLOCK_BITS, "Bit_count counts all block bits.");
     111              : 
     112              : /*=========================      Prototypes      ============================*/
     113              : 
     114              : static size_t block_count_index(size_t);
     115              : static Bit_block *block_at(struct CCC_Flat_bitset const *, size_t);
     116              : static void set(Bit_block *, size_t, CCC_Tribool);
     117              : static Bit_block on(size_t);
     118              : static void fix_end(struct CCC_Flat_bitset *);
     119              : static CCC_Tribool status(Bit_block const *, size_t);
     120              : static size_t block_count(size_t);
     121              : static CCC_Tribool
     122              : any_or_none_range(struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool);
     123              : static CCC_Tribool all_range(struct CCC_Flat_bitset const *, size_t, size_t);
     124              : static CCC_Count first_trailing_bit_range(
     125              :     struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool
     126              : );
     127              : static CCC_Count first_leading_bit_range(
     128              :     struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool
     129              : );
     130              : static CCC_Count first_trailing_bits_range(
     131              :     struct CCC_Flat_bitset const *, size_t, size_t, size_t, CCC_Tribool
     132              : );
     133              : static CCC_Count first_leading_bits_range(
     134              :     struct CCC_Flat_bitset const *, size_t, size_t, size_t, CCC_Tribool
     135              : );
     136              : static CCC_Result
     137              : maybe_resize(struct CCC_Flat_bitset *, size_t, CCC_Allocator const *);
     138              : static size_t size_t_min(size_t, size_t);
     139              : static CCC_Tribool is_mask_match(Bit_block, Bit_block);
     140              : static Bit_block trailing_ones_mask(Bit_count);
     141              : static Bit_block leading_ones_mask(Bit_count);
     142              : static void set_all(struct CCC_Flat_bitset *, CCC_Tribool);
     143              : static Bit_count bit_count_index(size_t);
     144              : static CCC_Tribool
     145              : is_subset_of(struct CCC_Flat_bitset const *, struct CCC_Flat_bitset const *);
     146              : static Bit_count popcount(Bit_block);
     147              : static Bit_count count_trailing_zeros(Bit_block);
     148              : static Bit_count count_leading_zeros(Bit_block);
     149              : 
     150              : /*=======================   Public Interface   ==============================*/
     151              : 
     152              : CCC_Tribool
     153            4 : CCC_flat_bitset_is_proper_subset(
     154              :     CCC_Flat_bitset const *const subset, CCC_Flat_bitset const *const set
     155              : ) {
     156            4 :     if (!set || !subset) {
     157            2 :         return CCC_TRIBOOL_ERROR;
     158              :     }
     159            2 :     if (set->count <= subset->count) {
     160            1 :         return CCC_FALSE;
     161              :     }
     162            1 :     return is_subset_of(subset, set);
     163            4 : }
     164              : 
     165              : CCC_Tribool
     166           11 : CCC_flat_bitset_is_subset(
     167              :     CCC_Flat_bitset const *const subset, CCC_Flat_bitset const *const set
     168              : ) {
     169           11 :     if (!set || !subset) {
     170            2 :         return CCC_TRIBOOL_ERROR;
     171              :     }
     172            9 :     if (set->count < subset->count) {
     173            1 :         return CCC_FALSE;
     174              :     }
     175            8 :     return is_subset_of(subset, set);
     176           11 : }
     177              : 
     178              : CCC_Result
     179            8 : CCC_flat_bitset_or(
     180              :     CCC_Flat_bitset *const destination, CCC_Flat_bitset const *const source
     181              : ) {
     182            8 :     if (!destination || !source) {
     183            2 :         return CCC_RESULT_ARGUMENT_ERROR;
     184              :     }
     185            6 :     if (!destination->count || !source->count) {
     186            4 :         return CCC_RESULT_OK;
     187              :     }
     188            4 :     Block_count const end_block
     189            2 :         = block_count(size_t_min(destination->count, source->count));
     190           26 :     for (size_t b = 0; b < end_block; ++b) {
     191           24 :         destination->blocks[b] |= source->blocks[b];
     192           24 :     }
     193            2 :     fix_end(destination);
     194            2 :     return CCC_RESULT_OK;
     195            8 : }
     196              : 
     197              : CCC_Result
     198            6 : CCC_flat_bitset_xor(
     199              :     CCC_Flat_bitset *const destination, CCC_Flat_bitset const *const source
     200              : ) {
     201            6 :     if (!destination || !source) {
     202            2 :         return CCC_RESULT_ARGUMENT_ERROR;
     203              :     }
     204            4 :     if (!destination->count || !source->count) {
     205            2 :         return CCC_RESULT_OK;
     206              :     }
     207            4 :     Block_count const end_block
     208            2 :         = block_count(size_t_min(destination->count, source->count));
     209           26 :     for (Block_count b = 0; b < end_block; ++b) {
     210           24 :         destination->blocks[b] ^= source->blocks[b];
     211           24 :     }
     212            2 :     fix_end(destination);
     213            2 :     return CCC_RESULT_OK;
     214            6 : }
     215              : 
     216              : CCC_Result
     217            6 : CCC_flat_bitset_and(
     218              :     CCC_Flat_bitset *destination, CCC_Flat_bitset const *source
     219              : ) {
     220            6 :     if (!destination || !source) {
     221            2 :         return CCC_RESULT_ARGUMENT_ERROR;
     222              :     }
     223            4 :     if (!source->count) {
     224            1 :         set_all(destination, CCC_FALSE);
     225            1 :         return CCC_RESULT_OK;
     226              :     }
     227            3 :     if (!destination->count) {
     228            1 :         return CCC_RESULT_OK;
     229              :     }
     230            4 :     Block_count const end_block
     231            2 :         = block_count(size_t_min(destination->count, source->count));
     232           26 :     for (Block_count b = 0; b < end_block; ++b) {
     233           24 :         destination->blocks[b] &= source->blocks[b];
     234           24 :     }
     235            2 :     if (destination->count <= source->count) {
     236            1 :         return CCC_RESULT_OK;
     237              :     }
     238              :     /* The source widens to align with destination as integers would; same
     239              :        consequences. */
     240            1 :     Block_count const destination_blocks = block_count(destination->count);
     241            1 :     Block_count const remain = destination_blocks - end_block;
     242            2 :     (void)memset(
     243            2 :         destination->blocks + end_block, CCC_FALSE, remain * SIZEOF_BLOCK
     244              :     );
     245            1 :     fix_end(destination);
     246            1 :     return CCC_RESULT_OK;
     247            6 : }
     248              : 
     249              : CCC_Result
     250            9 : CCC_flat_bitset_shift_left(
     251              :     CCC_Flat_bitset *const bitset, size_t const left_shifts
     252              : ) {
     253            9 :     if (!bitset) {
     254            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     255              :     }
     256            8 :     if (!bitset->count || !left_shifts) {
     257            1 :         return CCC_RESULT_OK;
     258              :     }
     259            7 :     if (left_shifts >= bitset->count) {
     260            1 :         set_all(bitset, CCC_FALSE);
     261            1 :         return CCC_RESULT_OK;
     262              :     }
     263            6 :     Block_count const end = block_count_index(bitset->count - 1);
     264            6 :     Block_count const blocks = block_count_index(left_shifts);
     265            6 :     Bit_count const split = bit_count_index(left_shifts);
     266            6 :     if (!split) {
     267           31 :         for (Block_count shift = end - blocks + 1, write = end; shift--;
     268           29 :              --write) {
     269           29 :             bitset->blocks[write] = bitset->blocks[shift];
     270           29 :         }
     271            2 :     } else {
     272            4 :         Bit_count const remain = BLOCK_BITS - split;
     273           32 :         for (Block_count shift = end - blocks, write = end; shift > 0;
     274           28 :              --shift, --write) {
     275           56 :             bitset->blocks[write] = (bitset->blocks[shift] << split)
     276           28 :                                   | (bitset->blocks[shift - 1] >> remain);
     277           28 :         }
     278            4 :         bitset->blocks[blocks] = bitset->blocks[0] << split;
     279            4 :     }
     280              :     /* Zero fills in lower bits just as an integer shift would. */
     281           26 :     for (Block_count i = 0; i < blocks; ++i) {
     282           20 :         bitset->blocks[i] = 0;
     283           20 :     }
     284            6 :     fix_end(bitset);
     285            6 :     return CCC_RESULT_OK;
     286            9 : }
     287              : 
     288              : CCC_Result
     289            9 : CCC_flat_bitset_shift_right(
     290              :     CCC_Flat_bitset *const bitset, size_t const right_shifts
     291              : ) {
     292            9 :     if (!bitset) {
     293            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     294              :     }
     295            8 :     if (!bitset->count || !right_shifts) {
     296            1 :         return CCC_RESULT_OK;
     297              :     }
     298            7 :     if (right_shifts >= bitset->count) {
     299            1 :         set_all(bitset, CCC_FALSE);
     300            1 :         return CCC_RESULT_OK;
     301              :     }
     302            6 :     Block_count const end = block_count_index(bitset->count - 1);
     303            6 :     Block_count const blocks = block_count_index(right_shifts);
     304            6 :     Bit_count const split = bit_count_index(right_shifts);
     305            6 :     if (!split) {
     306           31 :         for (Block_count shift = blocks, write = 0; shift < end + 1;
     307           29 :              ++shift, ++write) {
     308           29 :             bitset->blocks[write] = bitset->blocks[shift];
     309           29 :         }
     310            2 :     } else {
     311            4 :         Bit_count const remain = BLOCK_BITS - split;
     312           32 :         for (Block_count shift = blocks, write = 0; shift < end;
     313           28 :              ++shift, ++write) {
     314           56 :             bitset->blocks[write] = (bitset->blocks[shift + 1] << remain)
     315           28 :                                   | (bitset->blocks[shift] >> split);
     316           28 :         }
     317            4 :         bitset->blocks[end - blocks] = bitset->blocks[end] >> split;
     318            4 :     }
     319              :     /* This is safe for a few reasons:
     320              :        - If shifts equals count we set all to 0 and returned early.
     321              :        - If we only have one block i will be equal to end and we are done.
     322              :        - If end is the 0th block we will stop after 1. A meaningful shift
     323              :          occurred in the 0th block so zeroing would be a mistake.
     324              :        - All other cases ensure it is safe to decrease i (no underflow).
     325              :        This operation emulates the zeroing of high bits on a right shift and
     326              :        a bit set is considered unsigned so we don't sign bit fill. */
     327           26 :     for (Block_count i = end; i > end - blocks; --i) {
     328           20 :         bitset->blocks[i] = 0;
     329           20 :     }
     330            6 :     fix_end(bitset);
     331            6 :     return CCC_RESULT_OK;
     332            9 : }
     333              : 
     334              : CCC_Tribool
     335         4144 : CCC_flat_bitset_test(CCC_Flat_bitset const *const bitset, size_t const i) {
     336         4144 :     if (!bitset) {
     337            1 :         return CCC_TRIBOOL_ERROR;
     338              :     }
     339         4143 :     if (i >= bitset->count) {
     340            7 :         return CCC_TRIBOOL_ERROR;
     341              :     }
     342         4136 :     return status(block_at(bitset, i), i);
     343         4144 : }
     344              : 
     345              : CCC_Tribool
     346        13721 : CCC_flat_bitset_set(
     347              :     CCC_Flat_bitset *const bitset, size_t const i, CCC_Tribool const b
     348              : ) {
     349        13721 :     if (!bitset) {
     350            1 :         return CCC_TRIBOOL_ERROR;
     351              :     }
     352        13720 :     if (i >= bitset->count) {
     353            2 :         return CCC_TRIBOOL_ERROR;
     354              :     }
     355        13718 :     Bit_block *const block = block_at(bitset, i);
     356        13718 :     CCC_Tribool const was = status(block, i);
     357        13718 :     set(block, i, b);
     358        13718 :     return was;
     359        13721 : }
     360              : 
     361              : CCC_Result
     362           34 : CCC_flat_bitset_set_all(CCC_Flat_bitset *const bitset, CCC_Tribool const b) {
     363           34 :     if (!bitset) {
     364            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     365              :     }
     366           33 :     if (bitset->count) {
     367           33 :         set_all(bitset, b);
     368           33 :     }
     369           33 :     return CCC_RESULT_OK;
     370           34 : }
     371              : 
     372              : /** A naive implementation might just call set for every index between the
     373              : start and start + count. However, calculating the block and index within each
     374              : block for every call to set costs a division and a modulo operation. This also
     375              : loads and stores a block multiple times just to set each bit within a block to
     376              : the same value. We can avoid this by handling the first and last block with one
     377              : operations and then handling everything in between with a bulk memset. */
     378              : CCC_Result
     379        10452 : CCC_flat_bitset_set_range(
     380              :     CCC_Flat_bitset *const bitset,
     381              :     size_t const range_start_index,
     382              :     size_t const range_bit_count,
     383              :     CCC_Tribool const b
     384              : ) {
     385        10452 :     size_t const range_end = range_start_index + range_bit_count;
     386        10452 :     if (!bitset || !range_bit_count || range_start_index >= bitset->count
     387        10452 :         || range_end < range_start_index || range_end > bitset->count) {
     388            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     389              :     }
     390        10451 :     Block_count start_block = block_count_index(range_start_index);
     391        10451 :     Bit_count const start_bit = bit_count_index(range_start_index);
     392        10451 :     Bit_block range_mask = leading_ones_mask(BLOCK_BITS - start_bit);
     393        10451 :     if (start_bit + range_bit_count < BLOCK_BITS) {
     394              :         range_mask
     395         1687 :             &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
     396         1687 :     }
     397              : 
     398              :     /* Logic is uniform except for key lines to turn bits on or off. */
     399        10451 :     b ? (bitset->blocks[start_block] |= range_mask)
     400         4201 :       : (bitset->blocks[start_block] &= ~range_mask);
     401              : 
     402        10451 :     Block_count const end_block = block_count_index(range_end - 1);
     403        10451 :     if (end_block == start_block) {
     404         1972 :         fix_end(bitset);
     405         1972 :         return CCC_RESULT_OK;
     406              :     }
     407         8479 :     if (++start_block != end_block) {
     408         5766 :         int const v = b ? ~0 : 0;
     409        11532 :         (void)memset(
     410         5766 :             &bitset->blocks[start_block],
     411         5766 :             v,
     412         5766 :             (end_block - start_block) * SIZEOF_BLOCK
     413              :         );
     414         5766 :     }
     415              :     /* Need the final included bit position modulo block size but then feed to
     416              :        mask creation function as a count. */
     417        16958 :     Bit_block const last_block_mask
     418         8479 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
     419              : 
     420         8479 :     b ? (bitset->blocks[end_block] |= last_block_mask)
     421         3248 :       : (bitset->blocks[end_block] &= ~last_block_mask);
     422              : 
     423         8479 :     fix_end(bitset);
     424         8479 :     return CCC_RESULT_OK;
     425        10452 : }
     426              : 
     427              : CCC_Tribool
     428            4 : CCC_flat_bitset_reset(CCC_Flat_bitset *const bitset, size_t const i) {
     429            4 :     if (!bitset) {
     430            1 :         return CCC_TRIBOOL_ERROR;
     431              :     }
     432            3 :     if (i >= bitset->count) {
     433            1 :         return CCC_TRIBOOL_ERROR;
     434              :     }
     435            2 :     Bit_block *const block = block_at(bitset, i);
     436            2 :     CCC_Tribool const was = status(block, i);
     437            2 :     *block &= ~on(i);
     438            2 :     fix_end(bitset);
     439            2 :     return was;
     440            4 : }
     441              : 
     442              : CCC_Result
     443           11 : CCC_flat_bitset_reset_all(CCC_Flat_bitset *const bitset) {
     444           11 :     if (!bitset) {
     445            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     446              :     }
     447           10 :     if (bitset->count) {
     448           10 :         (void)memset(
     449           10 :             bitset->blocks, CCC_FALSE, block_count(bitset->count) * SIZEOF_BLOCK
     450              :         );
     451           10 :     }
     452           10 :     return CCC_RESULT_OK;
     453           11 : }
     454              : 
     455              : /** Same concept as set range but easier. Handle first and last then set
     456              : everything in between to false with memset. */
     457              : CCC_Result
     458         2050 : CCC_flat_bitset_reset_range(
     459              :     CCC_Flat_bitset *const bitset,
     460              :     size_t const range_start_index,
     461              :     size_t const range_bit_count
     462              : ) {
     463         2050 :     size_t const range_end = range_start_index + range_bit_count;
     464         2050 :     if (!bitset || !range_bit_count || range_start_index >= bitset->count
     465         2049 :         || range_end < range_start_index || range_end > bitset->count) {
     466            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     467              :     }
     468         2049 :     Block_count start_block = block_count_index(range_start_index);
     469         2049 :     Bit_count const start_bit = bit_count_index(range_start_index);
     470         2049 :     Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
     471         2049 :     if (start_bit + range_bit_count < BLOCK_BITS) {
     472              :         first_block_mask
     473           33 :             &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
     474           33 :     }
     475         2049 :     bitset->blocks[start_block] &= ~first_block_mask;
     476         2049 :     Block_count const end_block = block_count_index(range_end - 1);
     477         2049 :     if (end_block == start_block) {
     478           66 :         fix_end(bitset);
     479           66 :         return CCC_RESULT_OK;
     480              :     }
     481         1983 :     if (++start_block != end_block) {
     482         1792 :         (void)memset(
     483         1792 :             &bitset->blocks[start_block],
     484              :             CCC_FALSE,
     485         1792 :             (end_block - start_block) * SIZEOF_BLOCK
     486              :         );
     487         1792 :     }
     488         3966 :     Bit_block const last_block_mask
     489         1983 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
     490         1983 :     bitset->blocks[end_block] &= ~last_block_mask;
     491         1983 :     fix_end(bitset);
     492         1983 :     return CCC_RESULT_OK;
     493         2050 : }
     494              : 
     495              : CCC_Tribool
     496            4 : CCC_flat_bitset_flip(CCC_Flat_bitset *const bitset, size_t const i) {
     497            4 :     if (!bitset || i > bitset->count) {
     498            2 :         return CCC_TRIBOOL_ERROR;
     499              :     }
     500            2 :     Block_count const b_i = block_count_index(i);
     501            2 :     Bit_block *const block = &bitset->blocks[b_i];
     502            2 :     CCC_Tribool const was = status(block, i);
     503            2 :     *block ^= on(i);
     504            2 :     fix_end(bitset);
     505            2 :     return was;
     506            4 : }
     507              : 
     508              : CCC_Result
     509            3 : CCC_flat_bitset_flip_all(CCC_Flat_bitset *const bitset) {
     510            3 :     if (!bitset) {
     511            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     512              :     }
     513            2 :     if (!bitset->count) {
     514            1 :         return CCC_RESULT_OK;
     515              :     }
     516            1 :     Block_count const end = block_count(bitset->count);
     517            2 :     for (size_t i = 0; i < end; ++i) {
     518            1 :         bitset->blocks[i] = ~bitset->blocks[i];
     519            1 :     }
     520            1 :     fix_end(bitset);
     521            1 :     return CCC_RESULT_OK;
     522            3 : }
     523              : 
     524              : /** Maybe future SIMD vectorization could speed things up here because we use
     525              : the same strat of handling first and last which just leaves a simpler bulk
     526              : operation in the middle. But we don't benefit from memset here. */
     527              : CCC_Result
     528         2563 : CCC_flat_bitset_flip_range(
     529              :     CCC_Flat_bitset *const bitset,
     530              :     size_t const range_start_index,
     531              :     size_t const range_bit_count
     532              : ) {
     533         2563 :     size_t const range_end = range_start_index + range_bit_count;
     534         2563 :     if (!bitset || !range_bit_count || range_start_index >= bitset->count
     535         2562 :         || range_end < range_start_index || range_end > bitset->count) {
     536            3 :         return CCC_RESULT_ARGUMENT_ERROR;
     537              :     }
     538         2560 :     Block_count start_block = block_count_index(range_start_index);
     539         2560 :     Bit_count const start_bit = bit_count_index(range_start_index);
     540         2560 :     Bit_block first_block_on = leading_ones_mask(BLOCK_BITS - start_bit);
     541         2560 :     if (start_bit + range_bit_count < BLOCK_BITS) {
     542              :         first_block_on
     543           62 :             &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
     544           62 :     }
     545         2560 :     bitset->blocks[start_block] ^= first_block_on;
     546         2560 :     Block_count const end_block = block_count_index(range_end - 1);
     547         2560 :     if (end_block == start_block) {
     548          128 :         fix_end(bitset);
     549          128 :         return CCC_RESULT_OK;
     550              :     }
     551        19456 :     while (++start_block < end_block) {
     552        17024 :         bitset->blocks[start_block] = ~bitset->blocks[start_block];
     553              :     }
     554         4864 :     Bit_block const last_block_mask
     555         2432 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
     556         2432 :     bitset->blocks[end_block] ^= last_block_mask;
     557         2432 :     fix_end(bitset);
     558         2432 :     return CCC_RESULT_OK;
     559         2563 : }
     560              : 
     561              : CCC_Count
     562           76 : CCC_flat_bitset_capacity(CCC_Flat_bitset const *const bitset) {
     563           76 :     if (!bitset) {
     564            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     565              :     }
     566           75 :     return (CCC_Count){.count = bitset->capacity};
     567           76 : }
     568              : 
     569              : CCC_Count
     570            2 : CCC_flat_bitset_blocks_capacity(CCC_Flat_bitset const *const bitset) {
     571            2 :     if (!bitset) {
     572            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     573              :     }
     574            2 :     return (CCC_Count){
     575            1 :         .count = block_count(bitset->capacity),
     576              :     };
     577            2 : }
     578              : 
     579              : CCC_Count
     580         1679 : CCC_flat_bitset_count(CCC_Flat_bitset const *const bitset) {
     581         1679 :     if (!bitset) {
     582            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     583              :     }
     584         1678 :     return (CCC_Count){.count = bitset->count};
     585         1679 : }
     586              : 
     587              : CCC_Count
     588            2 : CCC_flat_bitset_blocks_count(CCC_Flat_bitset const *const bitset) {
     589            2 :     if (!bitset) {
     590            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     591              :     }
     592            2 :     return (CCC_Count){
     593            1 :         .count = block_count(bitset->count),
     594              :     };
     595            2 : }
     596              : 
     597              : CCC_Tribool
     598         2304 : CCC_flat_bitset_is_empty(CCC_Flat_bitset const *const bitset) {
     599         2304 :     if (!bitset) {
     600            1 :         return CCC_TRIBOOL_ERROR;
     601              :     }
     602         2303 :     return !bitset->count;
     603         2304 : }
     604              : 
     605              : CCC_Count
     606         9297 : CCC_flat_bitset_popcount(CCC_Flat_bitset const *const bitset) {
     607         9297 :     if (!bitset) {
     608            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     609              :     }
     610         9296 :     if (!bitset->count) {
     611            9 :         return (CCC_Count){.count = 0};
     612              :     }
     613         9287 :     Block_count const end = block_count(bitset->count);
     614         9287 :     size_t count = 0;
     615       157390 :     for (Block_count i = 0; i < end; ++i) {
     616       148103 :         count += popcount(bitset->blocks[i]);
     617       148103 :     }
     618         9287 :     return (CCC_Count){.count = count};
     619         9297 : }
     620              : 
     621              : CCC_Count
     622         9220 : CCC_flat_bitset_popcount_range(
     623              :     CCC_Flat_bitset const *const bitset,
     624              :     size_t const range_start_index,
     625              :     size_t const range_bit_count
     626              : ) {
     627         9220 :     size_t const range_end = range_start_index + range_bit_count;
     628         9220 :     if (!bitset || !range_bit_count || range_start_index >= bitset->count
     629         9218 :         || range_end < range_start_index || range_end > bitset->count) {
     630            2 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
     631              :     }
     632         9218 :     size_t popped = 0;
     633         9218 :     Block_count start_block = block_count_index(range_start_index);
     634         9218 :     Bit_count const start_bit = bit_count_index(range_start_index);
     635         9218 :     Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
     636         9218 :     if (start_bit + range_bit_count < BLOCK_BITS) {
     637              :         first_block_mask
     638          188 :             &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
     639          188 :     }
     640         9218 :     popped += popcount(first_block_mask & bitset->blocks[start_block]);
     641         9218 :     Block_count const end_block = block_count_index(range_end - 1);
     642         9218 :     if (end_block == start_block) {
     643          388 :         return (CCC_Count){.count = popped};
     644              :     }
     645        70640 :     while (++start_block < end_block) {
     646        61810 :         popped += popcount(bitset->blocks[start_block]);
     647              :     }
     648        17660 :     Bit_block const last_block_mask
     649         8830 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
     650         8830 :     popped += popcount(last_block_mask & bitset->blocks[end_block]);
     651         8830 :     return (CCC_Count){.count = popped};
     652         9220 : }
     653              : 
     654              : CCC_Result
     655         1700 : CCC_flat_bitset_push_back(
     656              :     CCC_Flat_bitset *const bitset,
     657              :     CCC_Tribool const b,
     658              :     CCC_Allocator const *const allocator
     659              : ) {
     660         1700 :     if (!bitset || !allocator || b > CCC_TRUE || b < CCC_FALSE) {
     661            3 :         return CCC_RESULT_ARGUMENT_ERROR;
     662              :     }
     663         1697 :     CCC_Result const check_resize = maybe_resize(bitset, 1, allocator);
     664         1697 :     if (check_resize != CCC_RESULT_OK) {
     665            3 :         return check_resize;
     666              :     }
     667         1694 :     ++bitset->count;
     668         1694 :     set(block_at(bitset, bitset->count - 1), bitset->count - 1, b);
     669         1694 :     fix_end(bitset);
     670         1694 :     return CCC_RESULT_OK;
     671         1700 : }
     672              : 
     673              : CCC_Tribool
     674         2279 : CCC_flat_bitset_pop_back(CCC_Flat_bitset *const bitset) {
     675         2279 :     if (!bitset || !bitset->count) {
     676            1 :         return CCC_TRIBOOL_ERROR;
     677              :     }
     678         4556 :     CCC_Tribool const was
     679         2278 :         = status(block_at(bitset, bitset->count - 1), bitset->count - 1);
     680         2278 :     --bitset->count;
     681         2278 :     fix_end(bitset);
     682         2278 :     return was;
     683         2279 : }
     684              : 
     685              : CCC_Tribool
     686         1035 : CCC_flat_bitset_any_range(
     687              :     CCC_Flat_bitset const *const bitset,
     688              :     size_t const range_start_index,
     689              :     size_t const range_bit_count
     690              : ) {
     691         1035 :     return any_or_none_range(
     692         1035 :         bitset, range_start_index, range_bit_count, CCC_TRUE
     693              :     );
     694              : }
     695              : 
     696              : CCC_Tribool
     697          512 : CCC_flat_bitset_any(CCC_Flat_bitset const *const bitset) {
     698          512 :     return any_or_none_range(bitset, 0, bitset->count, CCC_TRUE);
     699              : }
     700              : 
     701              : CCC_Tribool
     702          512 : CCC_flat_bitset_none_range(
     703              :     CCC_Flat_bitset const *const bitset,
     704              :     size_t const range_start_index,
     705              :     size_t const range_bit_count
     706              : ) {
     707          512 :     return any_or_none_range(
     708          512 :         bitset, range_start_index, range_bit_count, CCC_FALSE
     709              :     );
     710              : }
     711              : 
     712              : CCC_Tribool
     713          512 : CCC_flat_bitset_none(CCC_Flat_bitset const *const bitset) {
     714          512 :     return any_or_none_range(bitset, 0, bitset->count, CCC_FALSE);
     715              : }
     716              : 
     717              : CCC_Tribool
     718          524 : CCC_flat_bitset_all_range(
     719              :     CCC_Flat_bitset const *const bitset,
     720              :     size_t const range_start_index,
     721              :     size_t const range_bit_count
     722              : ) {
     723          524 :     return all_range(bitset, range_start_index, range_bit_count);
     724              : }
     725              : 
     726              : CCC_Tribool
     727          513 : CCC_flat_bitset_all(CCC_Flat_bitset const *const bitset) {
     728          513 :     return all_range(bitset, 0, bitset->count);
     729              : }
     730              : 
     731              : CCC_Count
     732         1029 : CCC_flat_bitset_first_trailing_one_range(
     733              :     CCC_Flat_bitset const *const bitset,
     734              :     size_t const range_start_index,
     735              :     size_t const range_bit_count
     736              : ) {
     737         1029 :     return first_trailing_bit_range(
     738         1029 :         bitset, range_start_index, range_bit_count, CCC_TRUE
     739              :     );
     740         1029 : }
     741              : 
     742              : CCC_Count
     743          511 : CCC_flat_bitset_first_trailing_one(CCC_Flat_bitset const *const bitset) {
     744          511 :     return first_trailing_bit_range(bitset, 0, bitset->count, CCC_TRUE);
     745          511 : }
     746              : 
     747              : CCC_Count
     748         4289 : CCC_flat_bitset_first_trailing_ones(
     749              :     CCC_Flat_bitset const *const bitset, size_t const ones_count
     750              : ) {
     751         4289 :     return first_trailing_bits_range(
     752         4289 :         bitset, 0, bitset->count, ones_count, CCC_TRUE
     753              :     );
     754         4289 : }
     755              : 
     756              : CCC_Count
     757         4325 : CCC_flat_bitset_first_trailing_ones_range(
     758              :     CCC_Flat_bitset const *const bitset,
     759              :     size_t const range_start_index,
     760              :     size_t const range_bit_count,
     761              :     size_t const ones_count
     762              : ) {
     763         4325 :     return first_trailing_bits_range(
     764         4325 :         bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
     765              :     );
     766         4325 : }
     767              : 
     768              : CCC_Count
     769         1022 : CCC_flat_bitset_first_trailing_zero_range(
     770              :     CCC_Flat_bitset const *const bitset,
     771              :     size_t const range_start_index,
     772              :     size_t const range_bit_count
     773              : ) {
     774         1022 :     return first_trailing_bit_range(
     775         1022 :         bitset, range_start_index, range_bit_count, CCC_FALSE
     776              :     );
     777         1022 : }
     778              : 
     779              : CCC_Count
     780          511 : CCC_flat_bitset_first_trailing_zero(CCC_Flat_bitset const *const bitset) {
     781          511 :     return first_trailing_bit_range(bitset, 0, bitset->count, CCC_FALSE);
     782          511 : }
     783              : 
     784              : CCC_Count
     785         4289 : CCC_flat_bitset_first_trailing_zeros(
     786              :     CCC_Flat_bitset const *const bitset, size_t const zeros_count
     787              : ) {
     788         4289 :     return first_trailing_bits_range(
     789         4289 :         bitset, 0, bitset->count, zeros_count, CCC_FALSE
     790              :     );
     791         4289 : }
     792              : 
     793              : CCC_Count
     794         4322 : CCC_flat_bitset_first_trailing_zeros_range(
     795              :     CCC_Flat_bitset const *const bitset,
     796              :     size_t const range_start_index,
     797              :     size_t const range_bit_count,
     798              :     size_t const zeros_count
     799              : ) {
     800         4322 :     return first_trailing_bits_range(
     801         4322 :         bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
     802              :     );
     803         4322 : }
     804              : 
     805              : CCC_Count
     806         1035 : CCC_flat_bitset_first_leading_one_range(
     807              :     CCC_Flat_bitset const *const bitset,
     808              :     size_t const range_start_index,
     809              :     size_t const range_bit_count
     810              : ) {
     811         1035 :     return first_leading_bit_range(
     812         1035 :         bitset, range_start_index, range_bit_count, CCC_TRUE
     813              :     );
     814         1035 : }
     815              : 
     816              : CCC_Count
     817          512 : CCC_flat_bitset_first_leading_one(CCC_Flat_bitset const *const bitset) {
     818          512 :     return first_leading_bit_range(bitset, 0, bitset->count, CCC_TRUE);
     819          512 : }
     820              : 
     821              : CCC_Count
     822         4280 : CCC_flat_bitset_first_leading_ones(
     823              :     CCC_Flat_bitset const *const bitset, size_t const ones_count
     824              : ) {
     825         4280 :     return first_leading_bits_range(
     826         4280 :         bitset, 0, bitset->count, ones_count, CCC_TRUE
     827              :     );
     828         4280 : }
     829              : 
     830              : CCC_Count
     831         4316 : CCC_flat_bitset_first_leading_ones_range(
     832              :     CCC_Flat_bitset const *const bitset,
     833              :     size_t const range_start_index,
     834              :     size_t const range_bit_count,
     835              :     size_t const ones_count
     836              : ) {
     837         4316 :     return first_leading_bits_range(
     838         4316 :         bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
     839              :     );
     840         4316 : }
     841              : 
     842              : CCC_Count
     843         1029 : CCC_flat_bitset_first_leading_zero_range(
     844              :     CCC_Flat_bitset const *const bitset,
     845              :     size_t const range_start_index,
     846              :     size_t const range_bit_count
     847              : ) {
     848         1029 :     return first_leading_bit_range(
     849         1029 :         bitset, range_start_index, range_bit_count, CCC_FALSE
     850              :     );
     851         1029 : }
     852              : 
     853              : CCC_Count
     854          512 : CCC_flat_bitset_first_leading_zero(CCC_Flat_bitset const *const bitset) {
     855          512 :     return first_leading_bit_range(bitset, 0, bitset->count, CCC_FALSE);
     856          512 : }
     857              : 
     858              : CCC_Count
     859         4280 : CCC_flat_bitset_first_leading_zeros(
     860              :     CCC_Flat_bitset const *const bitset, size_t const zeros_count
     861              : ) {
     862         4280 :     return first_leading_bits_range(
     863         4280 :         bitset, 0, bitset->count, zeros_count, CCC_FALSE
     864              :     );
     865         4280 : }
     866              : 
     867              : CCC_Count
     868         4313 : CCC_flat_bitset_first_leading_zeros_range(
     869              :     CCC_Flat_bitset const *const bitset,
     870              :     size_t const range_start_index,
     871              :     size_t const range_bit_count,
     872              :     size_t const zeros_count
     873              : ) {
     874         4313 :     return first_leading_bits_range(
     875         4313 :         bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
     876              :     );
     877         4313 : }
     878              : 
     879              : CCC_Result
     880            6 : CCC_flat_bitset_clear(CCC_Flat_bitset *const bitset) {
     881            6 :     if (!bitset) {
     882            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     883              :     }
     884            5 :     if (bitset->blocks) {
     885            5 :         assert(bitset->capacity);
     886            5 :         (void)memset(
     887            5 :             bitset->blocks,
     888              :             CCC_FALSE,
     889            5 :             block_count(bitset->capacity) * SIZEOF_BLOCK
     890              :         );
     891            5 :     }
     892            5 :     bitset->count = 0;
     893            5 :     return CCC_RESULT_OK;
     894            6 : }
     895              : 
     896              : CCC_Result
     897           11 : CCC_flat_bitset_clear_and_free(
     898              :     CCC_Flat_bitset *const bitset, CCC_Allocator const *const allocator
     899              : ) {
     900           11 :     if (!bitset || !allocator) {
     901            2 :         return CCC_RESULT_ARGUMENT_ERROR;
     902              :     }
     903            9 :     if (!allocator->allocate) {
     904            5 :         return CCC_RESULT_NO_ALLOCATION_FUNCTION;
     905              :     }
     906            4 :     if (bitset->blocks) {
     907            9 :         (void)allocator->allocate((CCC_Allocator_arguments){
     908            3 :             .input = bitset->blocks,
     909              :             .bytes = 0,
     910            3 :             .context = allocator->context,
     911              :         });
     912            3 :     }
     913            4 :     bitset->count = 0;
     914            4 :     bitset->capacity = 0;
     915            4 :     bitset->blocks = NULL;
     916            4 :     return CCC_RESULT_OK;
     917           11 : }
     918              : 
     919              : CCC_Result
     920           49 : CCC_flat_bitset_reserve(
     921              :     CCC_Flat_bitset *const bitset,
     922              :     size_t const to_add,
     923              :     CCC_Allocator const *const allocator
     924              : ) {
     925           49 :     if (!bitset || !allocator || !allocator->allocate || !to_add) {
     926            3 :         return CCC_RESULT_ARGUMENT_ERROR;
     927              :     }
     928           46 :     return maybe_resize(bitset, to_add, allocator);
     929           49 : }
     930              : 
     931              : CCC_Result
     932            8 : CCC_flat_bitset_copy(
     933              :     CCC_Flat_bitset *const destination,
     934              :     CCC_Flat_bitset const *const source,
     935              :     CCC_Allocator const *const allocator
     936              : ) {
     937            8 :     if (!destination || !source || !allocator
     938            6 :         || (destination->capacity < source->capacity && !allocator->allocate)) {
     939            3 :         return CCC_RESULT_ARGUMENT_ERROR;
     940              :     }
     941            5 :     if (!source->capacity) {
     942            1 :         destination->count = 0;
     943            1 :         return CCC_RESULT_OK;
     944              :     }
     945            4 :     if (destination->capacity < source->capacity) {
     946            6 :         Bit_block *const new_data
     947           12 :             = allocator->allocate((CCC_Allocator_arguments){
     948            3 :                 .input = destination->blocks,
     949            3 :                 .bytes = block_count(source->capacity) * SIZEOF_BLOCK,
     950            3 :                 .context = allocator->context,
     951              :             });
     952            3 :         if (!new_data) {
     953            1 :             return CCC_RESULT_ALLOCATOR_ERROR;
     954              :         }
     955            2 :         destination->blocks = new_data;
     956            2 :         destination->capacity = source->capacity;
     957            3 :     }
     958            3 :     if (!source->blocks || !destination->blocks) {
     959            1 :         return CCC_RESULT_ARGUMENT_ERROR;
     960              :     }
     961            2 :     destination->count = source->count;
     962            2 :     (void)memcpy(
     963            2 :         destination->blocks,
     964            2 :         source->blocks,
     965            2 :         block_count(source->capacity) * SIZEOF_BLOCK
     966              :     );
     967            2 :     fix_end(destination);
     968            2 :     return CCC_RESULT_OK;
     969            8 : }
     970              : 
     971              : void *
     972            2 : CCC_flat_bitset_data(CCC_Flat_bitset const *const bitset) {
     973            2 :     if (!bitset) {
     974            1 :         return NULL;
     975              :     }
     976            1 :     return bitset->blocks;
     977            2 : }
     978              : 
     979              : CCC_Tribool
     980            7 : CCC_flat_bitset_is_equal(
     981              :     CCC_Flat_bitset const *const left, CCC_Flat_bitset const *const right
     982              : ) {
     983            7 :     if (!left || !right) {
     984            2 :         return CCC_TRIBOOL_ERROR;
     985              :     }
     986            5 :     if (left->count != right->count) {
     987            1 :         return CCC_FALSE;
     988              :     }
     989            4 :     if (!left->count) {
     990            1 :         return CCC_TRUE;
     991              :     }
     992            6 :     return memcmp(
     993            3 :                left->blocks,
     994            3 :                right->blocks,
     995            3 :                block_count(left->count) * SIZEOF_BLOCK
     996              :            )
     997            3 :         == 0;
     998            7 : }
     999              : 
    1000              : /*=========================     Private Interface   =========================*/
    1001              : 
    1002              : CCC_Result
    1003           46 : CCC_private_flat_bitset_reserve(
    1004              :     struct CCC_Flat_bitset *const bitset,
    1005              :     size_t const to_add,
    1006              :     CCC_Allocator const *const allocator
    1007              : ) {
    1008           46 :     return CCC_flat_bitset_reserve(bitset, to_add, allocator);
    1009              : }
    1010              : 
    1011              : CCC_Tribool
    1012         2508 : CCC_private_flat_bitset_set(
    1013              :     struct CCC_Flat_bitset *const bitset,
    1014              :     size_t const index,
    1015              :     CCC_Tribool const bit
    1016              : ) {
    1017         2508 :     return CCC_flat_bitset_set(bitset, index, bit);
    1018              : }
    1019              : 
    1020              : /*=======================    Static Helpers    ==============================*/
    1021              : 
    1022              : /** Assumes set size is greater than or equal to subset size. */
    1023              : static inline CCC_Tribool
    1024            9 : is_subset_of(
    1025              :     struct CCC_Flat_bitset const *const subset,
    1026              :     struct CCC_Flat_bitset const *const set
    1027              : ) {
    1028            9 :     assert(set->count >= subset->count);
    1029           69 :     for (Block_count i = 0, end = block_count(subset->count); i < end; ++i) {
    1030              :         /* Invariant: the last N unused bits in a set zero so this works. */
    1031           60 :         if (!is_mask_match(set->blocks[i], subset->blocks[i])) {
    1032            4 :             return CCC_FALSE;
    1033              :         }
    1034           56 :     }
    1035            5 :     return CCC_TRUE;
    1036            9 : }
    1037              : 
    1038              : static CCC_Result
    1039         1743 : maybe_resize(
    1040              :     struct CCC_Flat_bitset *const bitset,
    1041              :     size_t const to_add,
    1042              :     CCC_Allocator const *const allocator
    1043              : ) {
    1044         1743 :     size_t bits_needed = bitset->count + to_add;
    1045         1743 :     if (bits_needed <= bitset->capacity) {
    1046         1692 :         return CCC_RESULT_OK;
    1047              :     }
    1048           51 :     if (!allocator->allocate) {
    1049            3 :         return CCC_RESULT_NO_ALLOCATION_FUNCTION;
    1050              :     }
    1051           48 :     if (to_add == 1) {
    1052            2 :         bits_needed <<= 1;
    1053            2 :     }
    1054              :     static_assert(
    1055              :         (BLOCK_BITS & (BLOCK_BITS - 1)) == 0,
    1056              :         "rounding trick only works for powers of 2"
    1057              :     );
    1058           96 :     size_t const new_capacity
    1059           48 :         = (bits_needed + (BLOCK_BITS - 1)) & ~((size_t)BLOCK_BITS - 1);
    1060           48 :     if (new_capacity <= bitset->capacity) {
    1061            1 :         return CCC_RESULT_ARGUMENT_ERROR;
    1062              :     }
    1063           94 :     size_t const new_bytes
    1064           47 :         = block_count(new_capacity - bitset->count) * SIZEOF_BLOCK;
    1065           94 :     size_t const old_bytes
    1066           47 :         = bitset->count ? block_count(bitset->count) * SIZEOF_BLOCK : 0;
    1067          188 :     Bit_block *const new_data = allocator->allocate((CCC_Allocator_arguments){
    1068           47 :         .input = bitset->blocks,
    1069           47 :         .bytes = block_count(new_capacity) * SIZEOF_BLOCK,
    1070           47 :         .context = allocator->context,
    1071              :     });
    1072           47 :     if (!new_data) {
    1073            1 :         return CCC_RESULT_ALLOCATOR_ERROR;
    1074              :     }
    1075           46 :     (void)memset((char *)new_data + old_bytes, 0, new_bytes);
    1076           46 :     bitset->capacity = new_capacity;
    1077           46 :     bitset->blocks = new_data;
    1078           46 :     return CCC_RESULT_OK;
    1079         1743 : }
    1080              : 
    1081              : /** A trailing bit in a range is the first bit set to the specified boolean
    1082              : value in the provided range. The input i gives the starting bit of the search,
    1083              : meaning a bit within a block that is the inclusive start of the range. The count
    1084              : gives us the end of the search for an overall range of `[i, i + count)`. This
    1085              : means if the search range is greater than a single block we will iterate in
    1086              : ascending order through our blocks and from least to most significant bit within
    1087              : each block. */
    1088              : static CCC_Count
    1089         3073 : first_trailing_bit_range(
    1090              :     struct CCC_Flat_bitset const *const bitset,
    1091              :     size_t const i,
    1092              :     size_t const count,
    1093              :     CCC_Tribool const is_one
    1094              : ) {
    1095         3073 :     size_t const exclusive_end = i + count;
    1096         3073 :     if (!bitset || !count || i >= bitset->count || exclusive_end < i
    1097         3072 :         || exclusive_end > bitset->count) {
    1098            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
    1099              :     }
    1100         3072 :     Block_count start_block = block_count_index(i);
    1101         3072 :     Bit_count const start_bit = bit_count_index(i);
    1102         3072 :     Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
    1103         3072 :     if (start_bit + count < BLOCK_BITS) {
    1104           65 :         first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
    1105           65 :     }
    1106         6144 :     Bit_count trailing_zeros = count_trailing_zeros(
    1107         3072 :         first_block_mask
    1108         3072 :         & (is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block])
    1109              :     );
    1110         3072 :     if (trailing_zeros != BLOCK_BITS) {
    1111         2112 :         return (CCC_Count){
    1112         1056 :             .count = (start_block * BLOCK_BITS) + trailing_zeros,
    1113              :         };
    1114              :     }
    1115         2016 :     Block_count const end_block = block_count_index(exclusive_end - 1);
    1116         2016 :     if (end_block == start_block) {
    1117           66 :         return (CCC_Count){.error = CCC_RESULT_FAIL};
    1118              :     }
    1119        15364 :     while (++start_block < end_block) {
    1120        14338 :         trailing_zeros = count_trailing_zeros(
    1121        14338 :             is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block]
    1122              :         );
    1123        14338 :         if (trailing_zeros != BLOCK_BITS) {
    1124         1848 :             return (CCC_Count){
    1125          924 :                 .count = (start_block * BLOCK_BITS) + trailing_zeros,
    1126              :             };
    1127              :         }
    1128              :     }
    1129         2052 :     Bit_block const last_block_mask
    1130         1026 :         = trailing_ones_mask(bit_count_index(exclusive_end - 1) + 1);
    1131         1026 :     trailing_zeros = count_trailing_zeros(
    1132         1026 :         last_block_mask
    1133         1026 :         & (is_one ? bitset->blocks[end_block] : ~bitset->blocks[end_block])
    1134              :     );
    1135         1026 :     if (trailing_zeros != BLOCK_BITS) {
    1136          134 :         return (CCC_Count){
    1137           67 :             .count = (end_block * BLOCK_BITS) + trailing_zeros,
    1138              :         };
    1139              :     }
    1140          959 :     return (CCC_Count){.error = CCC_RESULT_FAIL};
    1141         3073 : }
    1142              : 
    1143              : /** Finds the starting index of a sequence of 1's or 0's of the num_bits size in
    1144              : linear time. The algorithm aims to efficiently skip as many bits as possible
    1145              : while searching for the desired group. This avoids both an O(N^2) runtime and
    1146              : the use of any unnecessary modulo or division operations in a hot loop. */
    1147              : static CCC_Count
    1148        17225 : first_trailing_bits_range( /* NOLINT (*cognitive-complexity) */
    1149              :     struct CCC_Flat_bitset const *const bitset,
    1150              :     size_t const i,
    1151              :     size_t const count,
    1152              :     size_t const num_bits,
    1153              :     CCC_Tribool const ones
    1154              : ) {
    1155        17225 :     size_t const exclusive_end = i + count;
    1156        17225 :     if (!bitset || !count || !num_bits || i >= bitset->count || num_bits > count
    1157        17218 :         || exclusive_end < i || exclusive_end > bitset->count) {
    1158          209 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
    1159              :     }
    1160        17016 :     size_t bit_count = 0;
    1161        17016 :     size_t window_start = i;
    1162        17016 :     Block_count block_index = block_count_index(i);
    1163        17016 :     size_t window_end = (block_index * BLOCK_BITS) + BLOCK_BITS;
    1164        17016 :     Bit_count bit_index = bit_count_index(i);
    1165        34032 :     Bit_block bits
    1166        17016 :         = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
    1167        17016 :     bits &= leading_ones_mask(BLOCK_BITS - bit_index);
    1168       125970 :     for (;;) {
    1169       125970 :         if (window_end > exclusive_end) {
    1170         6317 :             bits &= trailing_ones_mask(bit_count_index(exclusive_end - 1) + 1);
    1171         6317 :         }
    1172       125970 :         if (!bits) {
    1173        97187 :             window_start = (block_index + 1) * BLOCK_BITS;
    1174        97187 :             bit_count = 0;
    1175        97187 :         } else {
    1176        28783 :             size_t bits_remain = num_bits - bit_count;
    1177              :             /* I would rather check for a prefix that could be completing in
    1178              :                this block here than check for a failure and reset to number of
    1179              :                bits on every iteration of the next loop. Think of this like loop
    1180              :                unrolling while also making next block simpler. */
    1181        28783 :             if (bits_remain <= BLOCK_BITS && bits_remain < num_bits) {
    1182        10068 :                 assert(bit_index < BLOCK_BITS && "shifts are valid for block");
    1183        10068 :                 Bit_block const shifted_bits = bits >> bit_index;
    1184        10068 :                 if (is_mask_match(
    1185        10068 :                         shifted_bits, trailing_ones_mask((Bit_count)bits_remain)
    1186              :                     )) {
    1187         6052 :                     return (CCC_Count){.count = window_start};
    1188              :                 }
    1189         4016 :                 bits_remain = num_bits;
    1190         4016 :                 bit_index += count_trailing_zeros(~shifted_bits);
    1191         4016 :                 bit_count = 0;
    1192         4016 :                 window_start = (block_index * BLOCK_BITS) + bit_index;
    1193        10068 :             }
    1194        22731 :             if (bits_remain <= BLOCK_BITS) {
    1195        10269 :                 assert(bit_index < BLOCK_BITS && "shifts are valid for block");
    1196        10269 :                 Bit_block shifted_bits = bits >> bit_index;
    1197        20538 :                 Bit_block const bits_remain_mask
    1198        10269 :                     = trailing_ones_mask((Bit_count)bits_remain);
    1199              :                 /* The loop continues only while our block is numerically
    1200              :                    greater than the mask. Because unsigned integers are
    1201              :                    represented in base 2 we get two automatic early exits here.
    1202              :                        - If the block is missing a high-order bit in the
    1203              :                          required mask, it is numerically smaller than the mask
    1204              :                          and cannot match with further shifting.
    1205              :                        - If all high bits match but some lower required bits are
    1206              :                          zero, the block is numerically smaller than the mask
    1207              :                          and cannot match with further shifting.
    1208              :                    If the block has high order bits not in the mask it is
    1209              :                    greater than the mask and we continue checking, which is
    1210              :                    correct. This strategy optimizes out some useless shifts. */
    1211        54884 :                 while (shifted_bits >= bits_remain_mask) {
    1212        47162 :                     if (is_mask_match(shifted_bits, bits_remain_mask)) {
    1213         5094 :                         return (CCC_Count){
    1214         2547 :                             .count = (block_index * BLOCK_BITS) + bit_index,
    1215              :                         };
    1216              :                     }
    1217        44615 :                     ++bit_index;
    1218        44615 :                     shifted_bits >>= 1;
    1219              :                 }
    1220         7722 :                 bit_count = 0;
    1221        10269 :             }
    1222              :             /* 2 cases covered: the ones remaining are greater than this block
    1223              :                could hold or we did not find a match by the masking we just did.
    1224              :                In either case we need the maximum contiguous ones that run all
    1225              :                the way to the MSB. The best we could have is a full block of
    1226              :                1's. Otherwise we need to find where to start our new search for
    1227              :                contiguous 1's. This could be the next block if there are not 1's
    1228              :                that continue to MSB. */
    1229        20184 :             Bit_count const leading_ones = count_leading_zeros(~bits);
    1230        20184 :             bit_count += leading_ones;
    1231        20184 :             if (leading_ones < BLOCK_BITS) {
    1232              :                 window_start
    1233        15658 :                     = (block_index * BLOCK_BITS) + (BLOCK_BITS - leading_ones);
    1234        15658 :             }
    1235        28783 :         }
    1236       117371 :         if (window_start + num_bits > exclusive_end) {
    1237         8417 :             return (CCC_Count){.error = CCC_RESULT_FAIL};
    1238              :         }
    1239       108954 :         bit_index = 0;
    1240       108954 :         ++block_index;
    1241       108954 :         window_end += BLOCK_BITS;
    1242            0 :         assert(
    1243       108954 :             block_index < block_count_index(bitset->capacity)
    1244       108954 :             && "only load bits within block array capacity"
    1245              :         );
    1246              :         bits
    1247       108954 :             = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
    1248              :     }
    1249        17225 : }
    1250              : 
    1251              : /** A leading bit is the first bit in the range to be set to the indicated value
    1252              : within a block starting the search from the Most Significant Bit of each block.
    1253              : This means that if the range is larger than a single block we iterate in
    1254              : descending order through the set of blocks starting at `i + count
    1255              : - 1` for the range of `[i, i + count)`. The search within a given block
    1256              : proceeds from Most Significant Bit toward Least Significant Bit. */
    1257              : static CCC_Count
    1258         3088 : first_leading_bit_range(
    1259              :     struct CCC_Flat_bitset const *const bitset,
    1260              :     size_t const start_index,
    1261              :     size_t const count,
    1262              :     CCC_Tribool const is_one
    1263              : ) {
    1264         3088 :     if (!bitset || !count || start_index >= bitset->count
    1265         3088 :         || start_index + count <= start_index
    1266         3088 :         || start_index + count > bitset->count) {
    1267         1022 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
    1268              :     }
    1269         2066 :     size_t const window_start = start_index + count - 1;
    1270         2066 :     Bit_count const start_bit = bit_count_index(window_start);
    1271         2066 :     Bit_count const end_bit = bit_count_index(start_index);
    1272         2066 :     Block_count start_block = block_count_index(window_start);
    1273         2066 :     Bit_block first_block_mask = trailing_ones_mask(start_bit + 1);
    1274         2066 :     if (start_index + count < BLOCK_BITS) {
    1275           76 :         first_block_mask &= leading_ones_mask(BLOCK_BITS - end_bit);
    1276           76 :     }
    1277         4132 :     Bit_count leading_zeros = count_leading_zeros(
    1278         2066 :         first_block_mask
    1279         2066 :         & (is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block])
    1280              :     );
    1281         2066 :     if (leading_zeros != BLOCK_BITS) {
    1282         2132 :         return (CCC_Count){
    1283         1066 :             .count = (start_block * BLOCK_BITS)
    1284         1066 :                    + (Block_count)(BLOCK_BITS - leading_zeros - 1),
    1285              :         };
    1286              :     }
    1287         1000 :     Block_count const end_block = block_count_index(start_index);
    1288         1000 :     if (start_block == end_block) {
    1289            4 :         return (CCC_Count){.error = CCC_RESULT_FAIL};
    1290              :     }
    1291         7774 :     while (--start_block > end_block) {
    1292         7702 :         leading_zeros = count_leading_zeros(
    1293         7702 :             is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block]
    1294              :         );
    1295         7702 :         if (leading_zeros != BLOCK_BITS) {
    1296         1848 :             return (CCC_Count){
    1297          924 :                 .count = (start_block * BLOCK_BITS)
    1298          924 :                        + (Block_count)(BLOCK_BITS - leading_zeros - 1),
    1299              :             };
    1300              :         }
    1301              :     }
    1302           72 :     Bit_block const last_block_on = leading_ones_mask(BLOCK_BITS - end_bit);
    1303           72 :     leading_zeros = count_leading_zeros(
    1304           72 :         last_block_on
    1305           72 :         & (is_one ? bitset->blocks[end_block] : ~bitset->blocks[end_block])
    1306              :     );
    1307           72 :     if (leading_zeros != BLOCK_BITS) {
    1308          138 :         return (CCC_Count){
    1309           69 :             .count = (end_block * BLOCK_BITS)
    1310           69 :                    + (Block_count)(BLOCK_BITS - leading_zeros - 1),
    1311              :         };
    1312              :     }
    1313            3 :     return (CCC_Count){.error = CCC_RESULT_FAIL};
    1314         3088 : }
    1315              : 
    1316              : /** Iterating backward from MSB to LSB requires more care to avoid unsigned
    1317              : integer wrapping. Therefore, this code is not identical to the trailing version
    1318              : due to a few more branches. This function previously used signed types to avoid
    1319              : this branching but that required making new signed types, copious casting, and
    1320              : verification of input to be within ptrdiff_t limits. For consistency and
    1321              : portability I think committing to unsigned is better for this function. */
    1322              : static CCC_Count
    1323        17189 : first_leading_bits_range( /* NOLINT (*cognitive-complexity) */
    1324              :     struct CCC_Flat_bitset const *const bitset,
    1325              :     size_t const index,
    1326              :     size_t const range_count,
    1327              :     size_t const bits_required,
    1328              :     CCC_Tribool const ones
    1329              : ) {
    1330        17189 :     if (!bitset || !range_count || !bits_required || index >= bitset->count
    1331        17188 :         || bits_required > range_count) {
    1332            1 :         return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
    1333              :     }
    1334        17188 :     size_t bit_count = 0;
    1335        17188 :     size_t window_start = (index + range_count - 1);
    1336        17188 :     Block_count block_index = block_count_index(window_start);
    1337        17188 :     Block_count window_inclusive_end = ((block_index * BLOCK_BITS));
    1338        17188 :     Bit_count bit_index = bit_count_index(window_start);
    1339        34376 :     Bit_block bits
    1340        17188 :         = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
    1341        17188 :     bits &= trailing_ones_mask(bit_index + 1);
    1342       130737 :     for (;;) {
    1343       130737 :         if (window_inclusive_end < index) {
    1344         5545 :             bits &= leading_ones_mask(BLOCK_BITS - bit_count_index(index));
    1345         5545 :         }
    1346       130737 :         if (!bits) {
    1347        96355 :             window_start = block_index ? (block_index * BLOCK_BITS) - 1 : 0;
    1348        96355 :             bit_count = 0;
    1349        96355 :         } else {
    1350        34382 :             size_t bits_remain = bits_required - bit_count;
    1351              :             /* I would rather check for a prefix that could be completing in
    1352              :                this block here than check for a failure and reset to number of
    1353              :                bits on every iteration of the next loop. Think of this like loop
    1354              :                unrolling while also making next block simpler. */
    1355        34382 :             if (bits_remain <= BLOCK_BITS && bits_remain < bits_required) {
    1356        11987 :                 assert(bit_index < BLOCK_BITS && "shifts are valid for block");
    1357        23974 :                 Bit_block const shifted_block = bits
    1358        11987 :                                              << (BLOCK_BITS - bit_index - 1);
    1359        11987 :                 if (is_mask_match(
    1360        11987 :                         shifted_block, leading_ones_mask((Bit_count)bits_remain)
    1361              :                     )) {
    1362         6038 :                     return (CCC_Count){.count = window_start};
    1363              :                 }
    1364         5949 :                 bits_remain = bits_required;
    1365        11898 :                 Bit_count const leading_ones
    1366         5949 :                     = count_leading_zeros(~shifted_block);
    1367         5949 :                 assert(bit_index >= leading_ones && "index cannot underflow");
    1368         5949 :                 bit_index -= leading_ones;
    1369         5949 :                 window_start = (block_index * BLOCK_BITS) + bit_index;
    1370         5949 :                 bit_count = 0;
    1371        11987 :             }
    1372        28344 :             if (bits_remain <= BLOCK_BITS) {
    1373        13287 :                 assert(bit_index < BLOCK_BITS && "shifts are valid for block");
    1374        13287 :                 Bit_block shifted_block = bits << (BLOCK_BITS - bit_index - 1);
    1375        26574 :                 Bit_block const bits_remain_mask
    1376        13287 :                     = leading_ones_mask((Bit_count)bits_remain);
    1377              :                 /* Can't find a clever way to reduce shifts like trailing. */
    1378        13287 :                 Bit_count const end_index = (Bit_count)(bits_remain - 1);
    1379       103330 :                 while (bit_index >= end_index) {
    1380        92586 :                     if (is_mask_match(shifted_block, bits_remain_mask)) {
    1381         5086 :                         return (CCC_Count){
    1382         2543 :                             .count = (block_index * BLOCK_BITS) + bit_index,
    1383              :                         };
    1384              :                     }
    1385        90043 :                     --bit_index;
    1386        90043 :                     shifted_block <<= 1;
    1387              :                 }
    1388        10744 :                 bit_count = 0;
    1389        13287 :             }
    1390        25801 :             Bit_count const trailing_ones = count_trailing_zeros(~bits);
    1391        25801 :             bit_count += trailing_ones;
    1392        25801 :             if (trailing_ones < BLOCK_BITS) {
    1393        20402 :                 window_start = (block_index * BLOCK_BITS) + trailing_ones;
    1394        20402 :                 if (window_start) {
    1395        19876 :                     --window_start;
    1396        19876 :                 }
    1397        20402 :             }
    1398        34382 :         }
    1399       122156 :         if (window_start < index + bits_required - 1) {
    1400         8607 :             return (CCC_Count){.error = CCC_RESULT_FAIL};
    1401              :         }
    1402       113549 :         bit_index = BLOCK_BITS - 1;
    1403       113549 :         --block_index;
    1404       113549 :         window_inclusive_end = window_inclusive_end >= BLOCK_BITS
    1405       113549 :                                  ? window_inclusive_end - BLOCK_BITS
    1406              :                                  : 0;
    1407            0 :         assert(
    1408       113549 :             block_index < block_count_index(bitset->capacity)
    1409       113549 :             && "current block within range while iterating toward LSB"
    1410              :         );
    1411              :         bits
    1412       113549 :             = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
    1413              :     }
    1414        17189 : }
    1415              : 
    1416              : /** Performs the any or none scan operation over the specified range. The only
    1417              : difference between the operations is the return value. Specify the desired
    1418              : Tribool value to return upon encountering an on bit. For any this is CCC_TRUE.
    1419              : For none this is CCC_FALSE. Saves writing two identical fns. */
    1420              : static CCC_Tribool
    1421         2571 : any_or_none_range(
    1422              :     struct CCC_Flat_bitset const *const bitset,
    1423              :     size_t const i,
    1424              :     size_t const count,
    1425              :     CCC_Tribool const any_or_none
    1426              : ) {
    1427         2571 :     size_t const range_end = i + count;
    1428         2571 :     if (!bitset || !count || i >= bitset->count || range_end < i
    1429         2570 :         || range_end > bitset->count || any_or_none < CCC_FALSE
    1430         2570 :         || any_or_none > CCC_TRUE) {
    1431            1 :         return CCC_TRIBOOL_ERROR;
    1432              :     }
    1433         2570 :     Block_count start_block = block_count_index(i);
    1434         2570 :     Bit_count const start_bit = bit_count_index(i);
    1435         2570 :     Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
    1436         2570 :     if (start_bit + count < BLOCK_BITS) {
    1437           18 :         first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
    1438           18 :     }
    1439         2570 :     if (first_block_mask & bitset->blocks[start_block]) {
    1440          166 :         return any_or_none;
    1441              :     }
    1442         2404 :     Block_count const end_block = block_count_index(range_end - 1);
    1443         2404 :     if (end_block == start_block) {
    1444           18 :         return !any_or_none;
    1445              :     }
    1446              :     /* If this is the any check we might get lucky by checking the last
    1447              :        block before looping over everything. */
    1448         4772 :     Bit_block const last_block_mask
    1449         2386 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
    1450         2386 :     if (last_block_mask & bitset->blocks[end_block]) {
    1451          226 :         return any_or_none;
    1452              :     }
    1453        20864 :     for (++start_block; start_block < end_block; ++start_block) {
    1454        19600 :         if (bitset->blocks[start_block] & BLOCK_ON) {
    1455          896 :             return any_or_none;
    1456              :         }
    1457        18704 :     }
    1458         1264 :     return !any_or_none;
    1459         2571 : }
    1460              : 
    1461              : /** Check for all on is slightly different from the any or none checks so we
    1462              : need a painfully repetitive function. */
    1463              : static CCC_Tribool
    1464         1037 : all_range(
    1465              :     struct CCC_Flat_bitset const *const bitset,
    1466              :     size_t const i,
    1467              :     size_t const count
    1468              : ) {
    1469         1037 :     size_t const range_end = i + count;
    1470         1037 :     if (!bitset || !count || i >= bitset->count || range_end < i
    1471         1036 :         || range_end > bitset->count) {
    1472            1 :         return CCC_TRIBOOL_ERROR;
    1473              :     }
    1474         1036 :     Block_count start_block = block_count_index(i);
    1475         1036 :     Bit_count const start_bit = bit_count_index(i);
    1476         1036 :     Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
    1477         1036 :     if (start_bit + count < BLOCK_BITS) {
    1478            5 :         first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
    1479            5 :     }
    1480         1036 :     if ((first_block_mask & bitset->blocks[start_block]) != first_block_mask) {
    1481          771 :         return CCC_FALSE;
    1482              :     }
    1483          265 :     Block_count const end_block = block_count_index(range_end - 1);
    1484          265 :     if (end_block == start_block) {
    1485            4 :         return CCC_TRUE;
    1486              :     }
    1487         2079 :     while (++start_block < end_block) {
    1488         1819 :         if (bitset->blocks[start_block] != BLOCK_ON) {
    1489            1 :             return CCC_FALSE;
    1490              :         }
    1491              :     }
    1492          520 :     Bit_block const last_block_mask
    1493          260 :         = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
    1494          260 :     return is_mask_match(bitset->blocks[end_block], last_block_mask);
    1495         1037 : }
    1496              : 
    1497              : /** Given the 0 based index from `[0, count of used bits in set)` returns a
    1498              : reference to the block that such a bit belongs to. This block reference will
    1499              : point to some block at index [0, count of blocks used in the set). */
    1500              : static inline Bit_block *
    1501        40900 : block_at(
    1502              :     struct CCC_Flat_bitset const *const bitset, size_t const flat_bitset_index
    1503              : ) {
    1504        40900 :     return &bitset->blocks[block_count_index(flat_bitset_index)];
    1505              : }
    1506              : 
    1507              : /** Sets all bits in bulk to value b and fixes the end block to ensure all bits
    1508              : in the final block that are not in use are zeroed out. */
    1509              : static inline void
    1510           36 : set_all(struct CCC_Flat_bitset *const bitset, CCC_Tribool const b) {
    1511           36 :     int const v = b ? ~0 : 0;
    1512           36 :     (void)memset(bitset->blocks, v, block_count(bitset->count) * SIZEOF_BLOCK);
    1513           36 :     fix_end(bitset);
    1514           36 : }
    1515              : 
    1516              : /** Given the appropriate block in which bit_i resides, sets the bits position
    1517              : to 0 or 1 as specified by the CCC_Tribool argument b.
    1518              : 
    1519              : Assumes block has been retrieved correctly in range [0, count of blocks in set)
    1520              : and that bit_i is in range [0, count of active bits in set). */
    1521              : static inline void
    1522        15412 : set(Bit_block *const block,
    1523              :     size_t const flat_bitset_index,
    1524              :     CCC_Tribool const b) {
    1525        15412 :     if (b) {
    1526         8986 :         *block |= on(flat_bitset_index);
    1527         8986 :     } else {
    1528         6426 :         *block &= ~on(flat_bitset_index);
    1529              :     }
    1530        15412 : }
    1531              : 
    1532              : /** Given the bit set and the set index--set index is allowed to be greater than
    1533              : the size of one block--returns the status of the bit at that index. */
    1534              : static inline CCC_Tribool
    1535        20136 : status(Bit_block const *const bitset, size_t const flat_bitset_index) {
    1536              :     /* Be careful. The & op does not promise to evaluate to 1 or 0. We often
    1537              :        just use it where that conversion takes place implicitly for us. */
    1538        20136 :     return (*bitset & on(flat_bitset_index)) != 0;
    1539              : }
    1540              : 
    1541              : /** Given the true bit index in the bit set, expected to be in the range
    1542              : [0, count of active bits in set), returns a Bit_block mask with only this bit
    1543              : on in block to which it belongs. This mask guarantees to have a bit on within
    1544              : a bit block at index [0, BIT_BLOCK_BITS - 1). */
    1545              : static inline Bit_block
    1546        35552 : on(size_t flat_bitset_index) {
    1547        35552 :     return (Bit_block)1 << bit_count_index(flat_bitset_index);
    1548              : }
    1549              : 
    1550              : /** Clears unused bits in the last block according to count. Sets the last block
    1551              : to have only the used bits set to their given values and all bits after to zero.
    1552              : This is used as a safety mechanism throughout the code after complex operations
    1553              : on bit blocks to ensure any side effects on unused bits are deleted. */
    1554              : static inline void
    1555        19092 : fix_end(struct CCC_Flat_bitset *const bitset) {
    1556        19092 :     bitset->count
    1557        19072 :         ? (*block_at(bitset, bitset->count - 1)
    1558        38144 :            &= trailing_ones_mask(bit_count_index(bitset->count - 1) + 1))
    1559           20 :         : (bitset->blocks[0] = 0);
    1560        19092 : }
    1561              : 
    1562              : /** Returns the 0-based index of the block in the block array allocation to
    1563              : which the given index belongs. Assumes the given index is somewhere between [0,
    1564              : count of bits set). The returned index then represents the block in which this
    1565              : index resides which is in the range [0, block containing last in use bit). */
    1566              : static inline Block_count
    1567       360618 : block_count_index(size_t const flat_bitset_index) {
    1568              :     static_assert(
    1569              :         (typeof(flat_bitset_index))~((typeof(flat_bitset_index))0)
    1570              :             >= (typeof(flat_bitset_index))0,
    1571              :         "shifting to avoid division with power of 2 divisor is only "
    1572              :         "defined for unsigned types"
    1573              :     );
    1574       360618 :     return flat_bitset_index >> BLOCK_BITS_LOG2;
    1575              : }
    1576              : 
    1577              : /** Returns the 0-based index within a block to which the given index belongs.
    1578              : This index will always be between [0, BIT_BLOCK_BITS - 1). */
    1579              : static inline Bit_count
    1580       161186 : bit_count_index(size_t const flat_bitset_index) {
    1581       161186 :     return flat_bitset_index & (BLOCK_BITS - 1);
    1582              : }
    1583              : 
    1584              : /** Returns the number of blocks required to store the given bits. Assumes bits
    1585              : is non-zero. For any bits > 1 the block count is always less than bits.*/
    1586              : static inline Block_count
    1587         9459 : block_count(size_t const bit_count) {
    1588              :     static_assert(
    1589              :         (typeof(bit_count))~((typeof(bit_count))0) >= (typeof(bit_count))0,
    1590              :         "shifting to avoid division with power of 2 divisor is only "
    1591              :         "defined for unsigned types"
    1592              :     );
    1593         9459 :     assert(bit_count && "calculating block count for non-empty bitset");
    1594         9459 :     return (bit_count + (BLOCK_BITS - 1)) >> BLOCK_BITS_LOG2;
    1595              : }
    1596              : 
    1597              : /** Returns min of size_t arguments. Beware of conversions. */
    1598              : static inline size_t
    1599            6 : size_t_min(size_t const a, size_t const b) {
    1600            6 :     return a < b ? a : b;
    1601              : }
    1602              : 
    1603              : /** Returns true if the on bit mask is present in the block. All one bits in the
    1604              : mask must be found at the same positions in the block being queried. */
    1605              : static inline CCC_Tribool
    1606       162123 : is_mask_match(Bit_block const block, Bit_block const on_mask) {
    1607       162123 :     return (block & on_mask) == on_mask;
    1608              : }
    1609              : 
    1610              : /** Returns a mask of the specified count of trailing bits set to 1 (CCC_TRUE)
    1611              : within a bit block. This is the same integer type that is stored in the bit set
    1612              : integer block array. Trailing ones are ones starting from index 0, the Least
    1613              : Significant Bit, counting toward the Most Significant Bit of the block. A count
    1614              : of zero will return 0. A count equivalent to the block bit width will return a
    1615              : mask with all bits set to 1. */
    1616              : static inline Bit_block
    1617        92434 : trailing_ones_mask(Bit_count const ones_count) {
    1618        92434 :     assert(ones_count <= BLOCK_BITS && "shift is well defined for mask");
    1619        92434 :     return ones_count ? BLOCK_ON >> (BLOCK_BITS - ones_count) : 0;
    1620              : }
    1621              : 
    1622              : /** Returns a mask of the specified count of leading bits set to 1 (CCC_TRUE)
    1623              : within a bit block. This is the same integer type that is stored in the bit set
    1624              : integer block array. Leading ones are ones starting from index BLOCK_BITS - 1,
    1625              : the Most Significant Bit, counting toward 0, the Least Significant Bit, of the
    1626              : block. A count of zero will return 0. A count equivalent to the block bit width
    1627              : will return a mask with all bits set to 1. */
    1628              : static inline Bit_block
    1629        78939 : leading_ones_mask(Bit_count const ones_count) {
    1630        78939 :     assert(ones_count <= BLOCK_BITS && "shift is well defined for mask");
    1631        78939 :     return ones_count ? BLOCK_ON << (BLOCK_BITS - ones_count) : 0;
    1632              : }
    1633              : 
    1634              : /** The following asserts assure that whether portable or built in bit
    1635              : operations are used in the coming section we are safe in our assumptions about
    1636              : widths and counts. */
    1637              : static_assert(
    1638              :     BLOCK_MSB < BLOCK_ON, "most significant bit is set for correct block width"
    1639              : );
    1640              : static_assert(
    1641              :     SIZEOF_BLOCK == sizeof(unsigned),
    1642              :     "builtins remain in sync with bitset block width"
    1643              : );
    1644              : 
    1645              : /** Much of the code relies on the assumption that iterating over blocks at
    1646              : at a time is faster than using mathematical operations to conceptually iterate
    1647              : over bits. This assumptions mostly comes from the use of these built-ins to
    1648              : keep the processing time linear for range based queries, while avoiding division
    1649              : and modulo operations. I should test to see the performance implications when
    1650              : these built-ins are gone. However they are pretty ubiquitous these days. */
    1651              : 
    1652              : /** Built-ins are common on Clang and GCC but we have portable fallback. */
    1653              : #if defined(__has_builtin) && __has_builtin(__builtin_ctz)                     \
    1654              :     && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount)
    1655              : 
    1656              : /** Counts the on bits in a bit block. */
    1657              : static inline Bit_count
    1658       227961 : popcount(Bit_block const b) {
    1659              :     /* There are different pop counts for different integer widths. Be sure
    1660              :        to catch the use of the wrong one by mistake here at compile time. */
    1661              :     static_assert(
    1662              :         __builtin_popcount((Bit_block)~0) <= U8_BLOCK_MAX,
    1663              :         "builtins return counts that are valid for smaller width types we use"
    1664              :     );
    1665       227961 :     return (Bit_count)__builtin_popcount(b);
    1666              : }
    1667              : 
    1668              : /** Counts the number of trailing zeros in a bit block starting from least
    1669              : significant bit. */
    1670              : static inline Bit_count
    1671        48253 : count_trailing_zeros(Bit_block const b) {
    1672              :     static_assert(
    1673              :         __builtin_ctz(BLOCK_MSB) <= U8_BLOCK_MAX,
    1674              :         "builtins return counts that are valid for smaller width types we use"
    1675              :     );
    1676        48253 :     return b ? (Bit_count)__builtin_ctz(b) : BLOCK_BITS;
    1677              : }
    1678              : 
    1679              : /** Counts the leading zeros in a bit block starting from the most significant
    1680              : bit. */
    1681              : static inline Bit_count
    1682        35973 : count_leading_zeros(Bit_block const b) {
    1683              :     static_assert(
    1684              :         __builtin_clz((Bit_block)1) <= U8_BLOCK_MAX,
    1685              :         "builtins return counts that are valid for smaller width types we use"
    1686              :     );
    1687        35973 :     return b ? (Bit_count)__builtin_clz(b) : BLOCK_BITS;
    1688              : }
    1689              : 
    1690              : #else /* !defined(__has_builtin) || !__has_builtin(__builtin_ctz)              \
    1691              :     || !__has_builtin(__builtin_clz) || !__has_builtin(__builtin_popcount) */
    1692              : 
    1693              : /** Counts the on bits in a bit block. */
    1694              : static inline Bit_count
    1695              : popcount(Bit_block b) {
    1696              :     Bit_count cnt = 0;
    1697              :     for (; b; cnt += ((b & 1U) != 0), b >>= 1U) {}
    1698              :     return cnt;
    1699              : }
    1700              : 
    1701              : /** Counts the number of trailing zeros in a bit block starting from least
    1702              : significant bit. */
    1703              : static inline Bit_count
    1704              : count_trailing_zeros(Bit_block b) {
    1705              :     if (!b) {
    1706              :         return BIT_BLOCK_BITS;
    1707              :     }
    1708              :     Bit_count cnt = 0;
    1709              :     for (; (b & 1U) == 0; ++cnt, b >>= 1U) {}
    1710              :     return cnt;
    1711              : }
    1712              : 
    1713              : /** Counts the leading zeros in a bit block starting from the most significant
    1714              : bit. */
    1715              : static inline Bit_count
    1716              : count_leading_zeros(Bit_block b) {
    1717              :     if (!b) {
    1718              :         return BIT_BLOCK_BITS;
    1719              :     }
    1720              :     Bit_count cnt = 0;
    1721              :     for (; (b & BIT_BLOCK_MSB) == 0; ++cnt, b <<= 1U) {}
    1722              :     return cnt;
    1723              : }
    1724              : 
    1725              : #endif /* defined(__has_builtin) && __has_builtin(__builtin_ctz)               \
    1726              :     && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount) */
        

Generated by: LCOV version 2.5.0-beta