Line data Source code
1 : /** Copyright 2025 Alexander G. Lopez
2 :
3 : Licensed under the Apache License, Version 2.0 (the "License");
4 : you may not use this file except in compliance with the License.
5 : You may obtain a copy of the License at
6 :
7 : http://www.apache.org/licenses/LICENSE-2.0
8 :
9 : Unless required by applicable law or agreed to in writing, software
10 : distributed under the License is distributed on an "AS IS" BASIS,
11 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : See the License for the specific language governing permissions and
13 : limitations under the License.
14 :
15 : This file implements a Bit Set using blocks of platform defined integers
16 : for speed and efficiency. The implementation aims for constant and linear time
17 : operations at all times, specifically when implementing more complicated range
18 : based operations over the set.
19 :
20 : It is also important to avoid modulo and division operations whenever possible.
21 : This is why much of the code revolves around obtaining indices by processing
22 : entire blocks at a time, rather than using mathematical operations to
23 : conceptually iterate over individual bits.
24 :
25 : Finally, the code is able to unite most functions for finding zeros or ones
26 : into a single function that accepts true or false as an additional argument.
27 : This is because every search for a zero can be solved by bitwise inverting a
28 : block and searching for a 1 instead. This elimination of identical functions
29 : costs a single branch in the function and is worth it to avoid code duplication
30 : and bug doubling. */
31 : /** C23 provided headers. */
32 : #include <assert.h>
33 : #include <limits.h>
34 : #include <stddef.h>
35 : #include <stdint.h>
36 :
37 : /** CCC provided headers. */
38 : #include "ccc/configuration.h"
39 : #include "ccc/flat_bitset.h"
40 : #include "ccc/private/private_flat_bitset.h"
41 : #include "ccc/types.h"
42 :
43 : /*========================= Type Declarations ============================*/
44 :
45 : /** @internal The type representing the word size used for each block of the
46 : Flat_bitset. This type may vary depending on the platform in order to maximize
47 : performance, so it is given a type. The implementation then should not concern
48 : itself with the word size.
49 :
50 : The implementation may assume that the block is a standard integer width on
51 : the given platform that is compatible with all basic arithmetic and bitwise
52 : operations. If SIMD is implemented then multiple blocks may be processed under
53 : this same assumption. */
54 : typedef typeof(*(struct CCC_Flat_bitset){}.blocks) Bit_block;
55 :
56 : /** @internal Used frequently so call the builtin just once. */
57 : enum : size_t {
58 : /** @internal Bytes of a bit block to help with byte calculations. */
59 : SIZEOF_BLOCK = sizeof(Bit_block),
60 : };
61 :
62 : /** @internal Various constants to support bit block size bit ops. */
63 : enum : Bit_block {
64 : /** @internal A mask of a bit block with all bits on. */
65 : BLOCK_ON = ((Bit_block)~0),
66 : /** @internal The Most Significant Bit of a bit block turned on to 1. */
67 : BLOCK_MSB = (((Bit_block)1) << (((SIZEOF_BLOCK * CHAR_BIT)) - 1)),
68 : };
69 :
70 : /** @internal An index into the block array or count of bit blocks. The block
71 : array is constructed by the number of blocks required to support the current bit
72 : set capacity. Assume this index type has range [0, block count to support N
73 : bits].
74 :
75 : User input is given as a `size_t` so distinguish from that input with this type
76 : to make it clear to the reader the index refers to a block not the given bit
77 : index the user has provided. */
78 : typedef size_t Block_count;
79 :
80 : /** @internal An index within a block. A block is set to some number of bits
81 : as determined by the type used for each block. This type is intended to count
82 : bits in a block and therefore cannot count up to arbitrary indices. Assume its
83 : range is `[0, BIT_BLOCK_BITS]`, for ease of use and clarity.
84 :
85 : There are many types of indexing and counting that take place in a bit set so
86 : use this type specifically for counting bits in a block so the reader is clear
87 : on intent. */
88 : typedef uint8_t Bit_count;
89 :
90 : enum : Bit_count {
91 : /** @internal How many total bits that fit in a bit block. */
92 : BLOCK_BITS = (SIZEOF_BLOCK * CHAR_BIT),
93 : /** @internal Used for static assert clarity. */
94 : U8_BLOCK_MAX = UINT8_MAX,
95 : /** @internal Hand coded log2 of block bits to avoid division. */
96 : BLOCK_BITS_LOG2 = 5,
97 : };
98 : static_assert(
99 : (BLOCK_BITS & (BLOCK_BITS - 1)) == 0,
100 : "the number of bits in a block is always a power of two, "
101 : "helping avoid division and modulo operations when possible"
102 : );
103 : static_assert(
104 : BLOCK_BITS >> BLOCK_BITS_LOG2 == 1,
105 : "hand coded log2 of bitblock bits is always correct"
106 : );
107 : static_assert(
108 : (Bit_count) ~((Bit_count)0) >= (Bit_count)0, "Bit_count must be unsigned"
109 : );
110 : static_assert(UINT8_MAX >= BLOCK_BITS, "Bit_count counts all block bits.");
111 :
112 : /*========================= Prototypes ============================*/
113 :
114 : static size_t block_count_index(size_t);
115 : static Bit_block *block_at(struct CCC_Flat_bitset const *, size_t);
116 : static void set(Bit_block *, size_t, CCC_Tribool);
117 : static Bit_block on(size_t);
118 : static void fix_end(struct CCC_Flat_bitset *);
119 : static CCC_Tribool status(Bit_block const *, size_t);
120 : static size_t block_count(size_t);
121 : static CCC_Tribool
122 : any_or_none_range(struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool);
123 : static CCC_Tribool all_range(struct CCC_Flat_bitset const *, size_t, size_t);
124 : static CCC_Count first_trailing_bit_range(
125 : struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool
126 : );
127 : static CCC_Count first_leading_bit_range(
128 : struct CCC_Flat_bitset const *, size_t, size_t, CCC_Tribool
129 : );
130 : static CCC_Count first_trailing_bits_range(
131 : struct CCC_Flat_bitset const *, size_t, size_t, size_t, CCC_Tribool
132 : );
133 : static CCC_Count first_leading_bits_range(
134 : struct CCC_Flat_bitset const *, size_t, size_t, size_t, CCC_Tribool
135 : );
136 : static CCC_Result
137 : maybe_resize(struct CCC_Flat_bitset *, size_t, CCC_Allocator const *);
138 : static size_t size_t_min(size_t, size_t);
139 : static CCC_Tribool is_mask_match(Bit_block, Bit_block);
140 : static Bit_block trailing_ones_mask(Bit_count);
141 : static Bit_block leading_ones_mask(Bit_count);
142 : static void set_all(struct CCC_Flat_bitset *, CCC_Tribool);
143 : static Bit_count bit_count_index(size_t);
144 : static CCC_Tribool
145 : is_subset_of(struct CCC_Flat_bitset const *, struct CCC_Flat_bitset const *);
146 : static Bit_count popcount(Bit_block);
147 : static Bit_count count_trailing_zeros(Bit_block);
148 : static Bit_count count_leading_zeros(Bit_block);
149 :
150 : /*======================= Public Interface ==============================*/
151 :
152 : CCC_Tribool
153 4 : CCC_flat_bitset_is_proper_subset(
154 : CCC_Flat_bitset const *const subset, CCC_Flat_bitset const *const set
155 : ) {
156 4 : if (!set || !subset) {
157 2 : return CCC_TRIBOOL_ERROR;
158 : }
159 2 : if (set->count <= subset->count) {
160 1 : return CCC_FALSE;
161 : }
162 1 : return is_subset_of(subset, set);
163 4 : }
164 :
165 : CCC_Tribool
166 11 : CCC_flat_bitset_is_subset(
167 : CCC_Flat_bitset const *const subset, CCC_Flat_bitset const *const set
168 : ) {
169 11 : if (!set || !subset) {
170 2 : return CCC_TRIBOOL_ERROR;
171 : }
172 9 : if (set->count < subset->count) {
173 1 : return CCC_FALSE;
174 : }
175 8 : return is_subset_of(subset, set);
176 11 : }
177 :
178 : CCC_Result
179 8 : CCC_flat_bitset_or(
180 : CCC_Flat_bitset *const destination, CCC_Flat_bitset const *const source
181 : ) {
182 8 : if (!destination || !source) {
183 2 : return CCC_RESULT_ARGUMENT_ERROR;
184 : }
185 6 : if (!destination->count || !source->count) {
186 4 : return CCC_RESULT_OK;
187 : }
188 4 : Block_count const end_block
189 2 : = block_count(size_t_min(destination->count, source->count));
190 26 : for (size_t b = 0; b < end_block; ++b) {
191 24 : destination->blocks[b] |= source->blocks[b];
192 24 : }
193 2 : fix_end(destination);
194 2 : return CCC_RESULT_OK;
195 8 : }
196 :
197 : CCC_Result
198 6 : CCC_flat_bitset_xor(
199 : CCC_Flat_bitset *const destination, CCC_Flat_bitset const *const source
200 : ) {
201 6 : if (!destination || !source) {
202 2 : return CCC_RESULT_ARGUMENT_ERROR;
203 : }
204 4 : if (!destination->count || !source->count) {
205 2 : return CCC_RESULT_OK;
206 : }
207 4 : Block_count const end_block
208 2 : = block_count(size_t_min(destination->count, source->count));
209 26 : for (Block_count b = 0; b < end_block; ++b) {
210 24 : destination->blocks[b] ^= source->blocks[b];
211 24 : }
212 2 : fix_end(destination);
213 2 : return CCC_RESULT_OK;
214 6 : }
215 :
216 : CCC_Result
217 6 : CCC_flat_bitset_and(
218 : CCC_Flat_bitset *destination, CCC_Flat_bitset const *source
219 : ) {
220 6 : if (!destination || !source) {
221 2 : return CCC_RESULT_ARGUMENT_ERROR;
222 : }
223 4 : if (!source->count) {
224 1 : set_all(destination, CCC_FALSE);
225 1 : return CCC_RESULT_OK;
226 : }
227 3 : if (!destination->count) {
228 1 : return CCC_RESULT_OK;
229 : }
230 4 : Block_count const end_block
231 2 : = block_count(size_t_min(destination->count, source->count));
232 26 : for (Block_count b = 0; b < end_block; ++b) {
233 24 : destination->blocks[b] &= source->blocks[b];
234 24 : }
235 2 : if (destination->count <= source->count) {
236 1 : return CCC_RESULT_OK;
237 : }
238 : /* The source widens to align with destination as integers would; same
239 : consequences. */
240 1 : Block_count const destination_blocks = block_count(destination->count);
241 1 : Block_count const remain = destination_blocks - end_block;
242 2 : (void)memset(
243 2 : destination->blocks + end_block, CCC_FALSE, remain * SIZEOF_BLOCK
244 : );
245 1 : fix_end(destination);
246 1 : return CCC_RESULT_OK;
247 6 : }
248 :
249 : CCC_Result
250 9 : CCC_flat_bitset_shift_left(
251 : CCC_Flat_bitset *const bitset, size_t const left_shifts
252 : ) {
253 9 : if (!bitset) {
254 1 : return CCC_RESULT_ARGUMENT_ERROR;
255 : }
256 8 : if (!bitset->count || !left_shifts) {
257 1 : return CCC_RESULT_OK;
258 : }
259 7 : if (left_shifts >= bitset->count) {
260 1 : set_all(bitset, CCC_FALSE);
261 1 : return CCC_RESULT_OK;
262 : }
263 6 : Block_count const end = block_count_index(bitset->count - 1);
264 6 : Block_count const blocks = block_count_index(left_shifts);
265 6 : Bit_count const split = bit_count_index(left_shifts);
266 6 : if (!split) {
267 31 : for (Block_count shift = end - blocks + 1, write = end; shift--;
268 29 : --write) {
269 29 : bitset->blocks[write] = bitset->blocks[shift];
270 29 : }
271 2 : } else {
272 4 : Bit_count const remain = BLOCK_BITS - split;
273 32 : for (Block_count shift = end - blocks, write = end; shift > 0;
274 28 : --shift, --write) {
275 56 : bitset->blocks[write] = (bitset->blocks[shift] << split)
276 28 : | (bitset->blocks[shift - 1] >> remain);
277 28 : }
278 4 : bitset->blocks[blocks] = bitset->blocks[0] << split;
279 4 : }
280 : /* Zero fills in lower bits just as an integer shift would. */
281 26 : for (Block_count i = 0; i < blocks; ++i) {
282 20 : bitset->blocks[i] = 0;
283 20 : }
284 6 : fix_end(bitset);
285 6 : return CCC_RESULT_OK;
286 9 : }
287 :
288 : CCC_Result
289 9 : CCC_flat_bitset_shift_right(
290 : CCC_Flat_bitset *const bitset, size_t const right_shifts
291 : ) {
292 9 : if (!bitset) {
293 1 : return CCC_RESULT_ARGUMENT_ERROR;
294 : }
295 8 : if (!bitset->count || !right_shifts) {
296 1 : return CCC_RESULT_OK;
297 : }
298 7 : if (right_shifts >= bitset->count) {
299 1 : set_all(bitset, CCC_FALSE);
300 1 : return CCC_RESULT_OK;
301 : }
302 6 : Block_count const end = block_count_index(bitset->count - 1);
303 6 : Block_count const blocks = block_count_index(right_shifts);
304 6 : Bit_count const split = bit_count_index(right_shifts);
305 6 : if (!split) {
306 31 : for (Block_count shift = blocks, write = 0; shift < end + 1;
307 29 : ++shift, ++write) {
308 29 : bitset->blocks[write] = bitset->blocks[shift];
309 29 : }
310 2 : } else {
311 4 : Bit_count const remain = BLOCK_BITS - split;
312 32 : for (Block_count shift = blocks, write = 0; shift < end;
313 28 : ++shift, ++write) {
314 56 : bitset->blocks[write] = (bitset->blocks[shift + 1] << remain)
315 28 : | (bitset->blocks[shift] >> split);
316 28 : }
317 4 : bitset->blocks[end - blocks] = bitset->blocks[end] >> split;
318 4 : }
319 : /* This is safe for a few reasons:
320 : - If shifts equals count we set all to 0 and returned early.
321 : - If we only have one block i will be equal to end and we are done.
322 : - If end is the 0th block we will stop after 1. A meaningful shift
323 : occurred in the 0th block so zeroing would be a mistake.
324 : - All other cases ensure it is safe to decrease i (no underflow).
325 : This operation emulates the zeroing of high bits on a right shift and
326 : a bit set is considered unsigned so we don't sign bit fill. */
327 26 : for (Block_count i = end; i > end - blocks; --i) {
328 20 : bitset->blocks[i] = 0;
329 20 : }
330 6 : fix_end(bitset);
331 6 : return CCC_RESULT_OK;
332 9 : }
333 :
334 : CCC_Tribool
335 4144 : CCC_flat_bitset_test(CCC_Flat_bitset const *const bitset, size_t const i) {
336 4144 : if (!bitset) {
337 1 : return CCC_TRIBOOL_ERROR;
338 : }
339 4143 : if (i >= bitset->count) {
340 7 : return CCC_TRIBOOL_ERROR;
341 : }
342 4136 : return status(block_at(bitset, i), i);
343 4144 : }
344 :
345 : CCC_Tribool
346 13721 : CCC_flat_bitset_set(
347 : CCC_Flat_bitset *const bitset, size_t const i, CCC_Tribool const b
348 : ) {
349 13721 : if (!bitset) {
350 1 : return CCC_TRIBOOL_ERROR;
351 : }
352 13720 : if (i >= bitset->count) {
353 2 : return CCC_TRIBOOL_ERROR;
354 : }
355 13718 : Bit_block *const block = block_at(bitset, i);
356 13718 : CCC_Tribool const was = status(block, i);
357 13718 : set(block, i, b);
358 13718 : return was;
359 13721 : }
360 :
361 : CCC_Result
362 34 : CCC_flat_bitset_set_all(CCC_Flat_bitset *const bitset, CCC_Tribool const b) {
363 34 : if (!bitset) {
364 1 : return CCC_RESULT_ARGUMENT_ERROR;
365 : }
366 33 : if (bitset->count) {
367 33 : set_all(bitset, b);
368 33 : }
369 33 : return CCC_RESULT_OK;
370 34 : }
371 :
372 : /** A naive implementation might just call set for every index between the
373 : start and start + count. However, calculating the block and index within each
374 : block for every call to set costs a division and a modulo operation. This also
375 : loads and stores a block multiple times just to set each bit within a block to
376 : the same value. We can avoid this by handling the first and last block with one
377 : operations and then handling everything in between with a bulk memset. */
378 : CCC_Result
379 10452 : CCC_flat_bitset_set_range(
380 : CCC_Flat_bitset *const bitset,
381 : size_t const range_start_index,
382 : size_t const range_bit_count,
383 : CCC_Tribool const b
384 : ) {
385 10452 : size_t const range_end = range_start_index + range_bit_count;
386 10452 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
387 10452 : || range_end < range_start_index || range_end > bitset->count) {
388 1 : return CCC_RESULT_ARGUMENT_ERROR;
389 : }
390 10451 : Block_count start_block = block_count_index(range_start_index);
391 10451 : Bit_count const start_bit = bit_count_index(range_start_index);
392 10451 : Bit_block range_mask = leading_ones_mask(BLOCK_BITS - start_bit);
393 10451 : if (start_bit + range_bit_count < BLOCK_BITS) {
394 : range_mask
395 1687 : &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
396 1687 : }
397 :
398 : /* Logic is uniform except for key lines to turn bits on or off. */
399 10451 : b ? (bitset->blocks[start_block] |= range_mask)
400 4201 : : (bitset->blocks[start_block] &= ~range_mask);
401 :
402 10451 : Block_count const end_block = block_count_index(range_end - 1);
403 10451 : if (end_block == start_block) {
404 1972 : fix_end(bitset);
405 1972 : return CCC_RESULT_OK;
406 : }
407 8479 : if (++start_block != end_block) {
408 5766 : int const v = b ? ~0 : 0;
409 11532 : (void)memset(
410 5766 : &bitset->blocks[start_block],
411 5766 : v,
412 5766 : (end_block - start_block) * SIZEOF_BLOCK
413 : );
414 5766 : }
415 : /* Need the final included bit position modulo block size but then feed to
416 : mask creation function as a count. */
417 16958 : Bit_block const last_block_mask
418 8479 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
419 :
420 8479 : b ? (bitset->blocks[end_block] |= last_block_mask)
421 3248 : : (bitset->blocks[end_block] &= ~last_block_mask);
422 :
423 8479 : fix_end(bitset);
424 8479 : return CCC_RESULT_OK;
425 10452 : }
426 :
427 : CCC_Tribool
428 4 : CCC_flat_bitset_reset(CCC_Flat_bitset *const bitset, size_t const i) {
429 4 : if (!bitset) {
430 1 : return CCC_TRIBOOL_ERROR;
431 : }
432 3 : if (i >= bitset->count) {
433 1 : return CCC_TRIBOOL_ERROR;
434 : }
435 2 : Bit_block *const block = block_at(bitset, i);
436 2 : CCC_Tribool const was = status(block, i);
437 2 : *block &= ~on(i);
438 2 : fix_end(bitset);
439 2 : return was;
440 4 : }
441 :
442 : CCC_Result
443 11 : CCC_flat_bitset_reset_all(CCC_Flat_bitset *const bitset) {
444 11 : if (!bitset) {
445 1 : return CCC_RESULT_ARGUMENT_ERROR;
446 : }
447 10 : if (bitset->count) {
448 10 : (void)memset(
449 10 : bitset->blocks, CCC_FALSE, block_count(bitset->count) * SIZEOF_BLOCK
450 : );
451 10 : }
452 10 : return CCC_RESULT_OK;
453 11 : }
454 :
455 : /** Same concept as set range but easier. Handle first and last then set
456 : everything in between to false with memset. */
457 : CCC_Result
458 2050 : CCC_flat_bitset_reset_range(
459 : CCC_Flat_bitset *const bitset,
460 : size_t const range_start_index,
461 : size_t const range_bit_count
462 : ) {
463 2050 : size_t const range_end = range_start_index + range_bit_count;
464 2050 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
465 2049 : || range_end < range_start_index || range_end > bitset->count) {
466 1 : return CCC_RESULT_ARGUMENT_ERROR;
467 : }
468 2049 : Block_count start_block = block_count_index(range_start_index);
469 2049 : Bit_count const start_bit = bit_count_index(range_start_index);
470 2049 : Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
471 2049 : if (start_bit + range_bit_count < BLOCK_BITS) {
472 : first_block_mask
473 33 : &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
474 33 : }
475 2049 : bitset->blocks[start_block] &= ~first_block_mask;
476 2049 : Block_count const end_block = block_count_index(range_end - 1);
477 2049 : if (end_block == start_block) {
478 66 : fix_end(bitset);
479 66 : return CCC_RESULT_OK;
480 : }
481 1983 : if (++start_block != end_block) {
482 1792 : (void)memset(
483 1792 : &bitset->blocks[start_block],
484 : CCC_FALSE,
485 1792 : (end_block - start_block) * SIZEOF_BLOCK
486 : );
487 1792 : }
488 3966 : Bit_block const last_block_mask
489 1983 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
490 1983 : bitset->blocks[end_block] &= ~last_block_mask;
491 1983 : fix_end(bitset);
492 1983 : return CCC_RESULT_OK;
493 2050 : }
494 :
495 : CCC_Tribool
496 4 : CCC_flat_bitset_flip(CCC_Flat_bitset *const bitset, size_t const i) {
497 4 : if (!bitset || i > bitset->count) {
498 2 : return CCC_TRIBOOL_ERROR;
499 : }
500 2 : Block_count const b_i = block_count_index(i);
501 2 : Bit_block *const block = &bitset->blocks[b_i];
502 2 : CCC_Tribool const was = status(block, i);
503 2 : *block ^= on(i);
504 2 : fix_end(bitset);
505 2 : return was;
506 4 : }
507 :
508 : CCC_Result
509 3 : CCC_flat_bitset_flip_all(CCC_Flat_bitset *const bitset) {
510 3 : if (!bitset) {
511 1 : return CCC_RESULT_ARGUMENT_ERROR;
512 : }
513 2 : if (!bitset->count) {
514 1 : return CCC_RESULT_OK;
515 : }
516 1 : Block_count const end = block_count(bitset->count);
517 2 : for (size_t i = 0; i < end; ++i) {
518 1 : bitset->blocks[i] = ~bitset->blocks[i];
519 1 : }
520 1 : fix_end(bitset);
521 1 : return CCC_RESULT_OK;
522 3 : }
523 :
524 : /** Maybe future SIMD vectorization could speed things up here because we use
525 : the same strat of handling first and last which just leaves a simpler bulk
526 : operation in the middle. But we don't benefit from memset here. */
527 : CCC_Result
528 2563 : CCC_flat_bitset_flip_range(
529 : CCC_Flat_bitset *const bitset,
530 : size_t const range_start_index,
531 : size_t const range_bit_count
532 : ) {
533 2563 : size_t const range_end = range_start_index + range_bit_count;
534 2563 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
535 2562 : || range_end < range_start_index || range_end > bitset->count) {
536 3 : return CCC_RESULT_ARGUMENT_ERROR;
537 : }
538 2560 : Block_count start_block = block_count_index(range_start_index);
539 2560 : Bit_count const start_bit = bit_count_index(range_start_index);
540 2560 : Bit_block first_block_on = leading_ones_mask(BLOCK_BITS - start_bit);
541 2560 : if (start_bit + range_bit_count < BLOCK_BITS) {
542 : first_block_on
543 62 : &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
544 62 : }
545 2560 : bitset->blocks[start_block] ^= first_block_on;
546 2560 : Block_count const end_block = block_count_index(range_end - 1);
547 2560 : if (end_block == start_block) {
548 128 : fix_end(bitset);
549 128 : return CCC_RESULT_OK;
550 : }
551 19456 : while (++start_block < end_block) {
552 17024 : bitset->blocks[start_block] = ~bitset->blocks[start_block];
553 : }
554 4864 : Bit_block const last_block_mask
555 2432 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
556 2432 : bitset->blocks[end_block] ^= last_block_mask;
557 2432 : fix_end(bitset);
558 2432 : return CCC_RESULT_OK;
559 2563 : }
560 :
561 : CCC_Count
562 76 : CCC_flat_bitset_capacity(CCC_Flat_bitset const *const bitset) {
563 76 : if (!bitset) {
564 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
565 : }
566 75 : return (CCC_Count){.count = bitset->capacity};
567 76 : }
568 :
569 : CCC_Count
570 2 : CCC_flat_bitset_blocks_capacity(CCC_Flat_bitset const *const bitset) {
571 2 : if (!bitset) {
572 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
573 : }
574 2 : return (CCC_Count){
575 1 : .count = block_count(bitset->capacity),
576 : };
577 2 : }
578 :
579 : CCC_Count
580 1679 : CCC_flat_bitset_count(CCC_Flat_bitset const *const bitset) {
581 1679 : if (!bitset) {
582 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
583 : }
584 1678 : return (CCC_Count){.count = bitset->count};
585 1679 : }
586 :
587 : CCC_Count
588 2 : CCC_flat_bitset_blocks_count(CCC_Flat_bitset const *const bitset) {
589 2 : if (!bitset) {
590 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
591 : }
592 2 : return (CCC_Count){
593 1 : .count = block_count(bitset->count),
594 : };
595 2 : }
596 :
597 : CCC_Tribool
598 2304 : CCC_flat_bitset_is_empty(CCC_Flat_bitset const *const bitset) {
599 2304 : if (!bitset) {
600 1 : return CCC_TRIBOOL_ERROR;
601 : }
602 2303 : return !bitset->count;
603 2304 : }
604 :
605 : CCC_Count
606 9297 : CCC_flat_bitset_popcount(CCC_Flat_bitset const *const bitset) {
607 9297 : if (!bitset) {
608 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
609 : }
610 9296 : if (!bitset->count) {
611 9 : return (CCC_Count){.count = 0};
612 : }
613 9287 : Block_count const end = block_count(bitset->count);
614 9287 : size_t count = 0;
615 157390 : for (Block_count i = 0; i < end; ++i) {
616 148103 : count += popcount(bitset->blocks[i]);
617 148103 : }
618 9287 : return (CCC_Count){.count = count};
619 9297 : }
620 :
621 : CCC_Count
622 9220 : CCC_flat_bitset_popcount_range(
623 : CCC_Flat_bitset const *const bitset,
624 : size_t const range_start_index,
625 : size_t const range_bit_count
626 : ) {
627 9220 : size_t const range_end = range_start_index + range_bit_count;
628 9220 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
629 9218 : || range_end < range_start_index || range_end > bitset->count) {
630 2 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
631 : }
632 9218 : size_t popped = 0;
633 9218 : Block_count start_block = block_count_index(range_start_index);
634 9218 : Bit_count const start_bit = bit_count_index(range_start_index);
635 9218 : Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
636 9218 : if (start_bit + range_bit_count < BLOCK_BITS) {
637 : first_block_mask
638 188 : &= trailing_ones_mask((Bit_count)(start_bit + range_bit_count));
639 188 : }
640 9218 : popped += popcount(first_block_mask & bitset->blocks[start_block]);
641 9218 : Block_count const end_block = block_count_index(range_end - 1);
642 9218 : if (end_block == start_block) {
643 388 : return (CCC_Count){.count = popped};
644 : }
645 70640 : while (++start_block < end_block) {
646 61810 : popped += popcount(bitset->blocks[start_block]);
647 : }
648 17660 : Bit_block const last_block_mask
649 8830 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
650 8830 : popped += popcount(last_block_mask & bitset->blocks[end_block]);
651 8830 : return (CCC_Count){.count = popped};
652 9220 : }
653 :
654 : CCC_Result
655 1700 : CCC_flat_bitset_push_back(
656 : CCC_Flat_bitset *const bitset,
657 : CCC_Tribool const b,
658 : CCC_Allocator const *const allocator
659 : ) {
660 1700 : if (!bitset || !allocator || b > CCC_TRUE || b < CCC_FALSE) {
661 3 : return CCC_RESULT_ARGUMENT_ERROR;
662 : }
663 1697 : CCC_Result const check_resize = maybe_resize(bitset, 1, allocator);
664 1697 : if (check_resize != CCC_RESULT_OK) {
665 3 : return check_resize;
666 : }
667 1694 : ++bitset->count;
668 1694 : set(block_at(bitset, bitset->count - 1), bitset->count - 1, b);
669 1694 : fix_end(bitset);
670 1694 : return CCC_RESULT_OK;
671 1700 : }
672 :
673 : CCC_Tribool
674 2279 : CCC_flat_bitset_pop_back(CCC_Flat_bitset *const bitset) {
675 2279 : if (!bitset || !bitset->count) {
676 1 : return CCC_TRIBOOL_ERROR;
677 : }
678 4556 : CCC_Tribool const was
679 2278 : = status(block_at(bitset, bitset->count - 1), bitset->count - 1);
680 2278 : --bitset->count;
681 2278 : fix_end(bitset);
682 2278 : return was;
683 2279 : }
684 :
685 : CCC_Tribool
686 1035 : CCC_flat_bitset_any_range(
687 : CCC_Flat_bitset const *const bitset,
688 : size_t const range_start_index,
689 : size_t const range_bit_count
690 : ) {
691 1035 : return any_or_none_range(
692 1035 : bitset, range_start_index, range_bit_count, CCC_TRUE
693 : );
694 : }
695 :
696 : CCC_Tribool
697 512 : CCC_flat_bitset_any(CCC_Flat_bitset const *const bitset) {
698 512 : return any_or_none_range(bitset, 0, bitset->count, CCC_TRUE);
699 : }
700 :
701 : CCC_Tribool
702 512 : CCC_flat_bitset_none_range(
703 : CCC_Flat_bitset const *const bitset,
704 : size_t const range_start_index,
705 : size_t const range_bit_count
706 : ) {
707 512 : return any_or_none_range(
708 512 : bitset, range_start_index, range_bit_count, CCC_FALSE
709 : );
710 : }
711 :
712 : CCC_Tribool
713 512 : CCC_flat_bitset_none(CCC_Flat_bitset const *const bitset) {
714 512 : return any_or_none_range(bitset, 0, bitset->count, CCC_FALSE);
715 : }
716 :
717 : CCC_Tribool
718 524 : CCC_flat_bitset_all_range(
719 : CCC_Flat_bitset const *const bitset,
720 : size_t const range_start_index,
721 : size_t const range_bit_count
722 : ) {
723 524 : return all_range(bitset, range_start_index, range_bit_count);
724 : }
725 :
726 : CCC_Tribool
727 513 : CCC_flat_bitset_all(CCC_Flat_bitset const *const bitset) {
728 513 : return all_range(bitset, 0, bitset->count);
729 : }
730 :
731 : CCC_Count
732 1029 : CCC_flat_bitset_first_trailing_one_range(
733 : CCC_Flat_bitset const *const bitset,
734 : size_t const range_start_index,
735 : size_t const range_bit_count
736 : ) {
737 1029 : return first_trailing_bit_range(
738 1029 : bitset, range_start_index, range_bit_count, CCC_TRUE
739 : );
740 1029 : }
741 :
742 : CCC_Count
743 511 : CCC_flat_bitset_first_trailing_one(CCC_Flat_bitset const *const bitset) {
744 511 : return first_trailing_bit_range(bitset, 0, bitset->count, CCC_TRUE);
745 511 : }
746 :
747 : CCC_Count
748 4289 : CCC_flat_bitset_first_trailing_ones(
749 : CCC_Flat_bitset const *const bitset, size_t const ones_count
750 : ) {
751 4289 : return first_trailing_bits_range(
752 4289 : bitset, 0, bitset->count, ones_count, CCC_TRUE
753 : );
754 4289 : }
755 :
756 : CCC_Count
757 4325 : CCC_flat_bitset_first_trailing_ones_range(
758 : CCC_Flat_bitset const *const bitset,
759 : size_t const range_start_index,
760 : size_t const range_bit_count,
761 : size_t const ones_count
762 : ) {
763 4325 : return first_trailing_bits_range(
764 4325 : bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
765 : );
766 4325 : }
767 :
768 : CCC_Count
769 1022 : CCC_flat_bitset_first_trailing_zero_range(
770 : CCC_Flat_bitset const *const bitset,
771 : size_t const range_start_index,
772 : size_t const range_bit_count
773 : ) {
774 1022 : return first_trailing_bit_range(
775 1022 : bitset, range_start_index, range_bit_count, CCC_FALSE
776 : );
777 1022 : }
778 :
779 : CCC_Count
780 511 : CCC_flat_bitset_first_trailing_zero(CCC_Flat_bitset const *const bitset) {
781 511 : return first_trailing_bit_range(bitset, 0, bitset->count, CCC_FALSE);
782 511 : }
783 :
784 : CCC_Count
785 4289 : CCC_flat_bitset_first_trailing_zeros(
786 : CCC_Flat_bitset const *const bitset, size_t const zeros_count
787 : ) {
788 4289 : return first_trailing_bits_range(
789 4289 : bitset, 0, bitset->count, zeros_count, CCC_FALSE
790 : );
791 4289 : }
792 :
793 : CCC_Count
794 4322 : CCC_flat_bitset_first_trailing_zeros_range(
795 : CCC_Flat_bitset const *const bitset,
796 : size_t const range_start_index,
797 : size_t const range_bit_count,
798 : size_t const zeros_count
799 : ) {
800 4322 : return first_trailing_bits_range(
801 4322 : bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
802 : );
803 4322 : }
804 :
805 : CCC_Count
806 1035 : CCC_flat_bitset_first_leading_one_range(
807 : CCC_Flat_bitset const *const bitset,
808 : size_t const range_start_index,
809 : size_t const range_bit_count
810 : ) {
811 1035 : return first_leading_bit_range(
812 1035 : bitset, range_start_index, range_bit_count, CCC_TRUE
813 : );
814 1035 : }
815 :
816 : CCC_Count
817 512 : CCC_flat_bitset_first_leading_one(CCC_Flat_bitset const *const bitset) {
818 512 : return first_leading_bit_range(bitset, 0, bitset->count, CCC_TRUE);
819 512 : }
820 :
821 : CCC_Count
822 4280 : CCC_flat_bitset_first_leading_ones(
823 : CCC_Flat_bitset const *const bitset, size_t const ones_count
824 : ) {
825 4280 : return first_leading_bits_range(
826 4280 : bitset, 0, bitset->count, ones_count, CCC_TRUE
827 : );
828 4280 : }
829 :
830 : CCC_Count
831 4316 : CCC_flat_bitset_first_leading_ones_range(
832 : CCC_Flat_bitset const *const bitset,
833 : size_t const range_start_index,
834 : size_t const range_bit_count,
835 : size_t const ones_count
836 : ) {
837 4316 : return first_leading_bits_range(
838 4316 : bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
839 : );
840 4316 : }
841 :
842 : CCC_Count
843 1029 : CCC_flat_bitset_first_leading_zero_range(
844 : CCC_Flat_bitset const *const bitset,
845 : size_t const range_start_index,
846 : size_t const range_bit_count
847 : ) {
848 1029 : return first_leading_bit_range(
849 1029 : bitset, range_start_index, range_bit_count, CCC_FALSE
850 : );
851 1029 : }
852 :
853 : CCC_Count
854 512 : CCC_flat_bitset_first_leading_zero(CCC_Flat_bitset const *const bitset) {
855 512 : return first_leading_bit_range(bitset, 0, bitset->count, CCC_FALSE);
856 512 : }
857 :
858 : CCC_Count
859 4280 : CCC_flat_bitset_first_leading_zeros(
860 : CCC_Flat_bitset const *const bitset, size_t const zeros_count
861 : ) {
862 4280 : return first_leading_bits_range(
863 4280 : bitset, 0, bitset->count, zeros_count, CCC_FALSE
864 : );
865 4280 : }
866 :
867 : CCC_Count
868 4313 : CCC_flat_bitset_first_leading_zeros_range(
869 : CCC_Flat_bitset const *const bitset,
870 : size_t const range_start_index,
871 : size_t const range_bit_count,
872 : size_t const zeros_count
873 : ) {
874 4313 : return first_leading_bits_range(
875 4313 : bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
876 : );
877 4313 : }
878 :
879 : CCC_Result
880 6 : CCC_flat_bitset_clear(CCC_Flat_bitset *const bitset) {
881 6 : if (!bitset) {
882 1 : return CCC_RESULT_ARGUMENT_ERROR;
883 : }
884 5 : if (bitset->blocks) {
885 5 : assert(bitset->capacity);
886 5 : (void)memset(
887 5 : bitset->blocks,
888 : CCC_FALSE,
889 5 : block_count(bitset->capacity) * SIZEOF_BLOCK
890 : );
891 5 : }
892 5 : bitset->count = 0;
893 5 : return CCC_RESULT_OK;
894 6 : }
895 :
896 : CCC_Result
897 11 : CCC_flat_bitset_clear_and_free(
898 : CCC_Flat_bitset *const bitset, CCC_Allocator const *const allocator
899 : ) {
900 11 : if (!bitset || !allocator) {
901 2 : return CCC_RESULT_ARGUMENT_ERROR;
902 : }
903 9 : if (!allocator->allocate) {
904 5 : return CCC_RESULT_NO_ALLOCATION_FUNCTION;
905 : }
906 4 : if (bitset->blocks) {
907 9 : (void)allocator->allocate((CCC_Allocator_arguments){
908 3 : .input = bitset->blocks,
909 : .bytes = 0,
910 3 : .context = allocator->context,
911 : });
912 3 : }
913 4 : bitset->count = 0;
914 4 : bitset->capacity = 0;
915 4 : bitset->blocks = NULL;
916 4 : return CCC_RESULT_OK;
917 11 : }
918 :
919 : CCC_Result
920 49 : CCC_flat_bitset_reserve(
921 : CCC_Flat_bitset *const bitset,
922 : size_t const to_add,
923 : CCC_Allocator const *const allocator
924 : ) {
925 49 : if (!bitset || !allocator || !allocator->allocate || !to_add) {
926 3 : return CCC_RESULT_ARGUMENT_ERROR;
927 : }
928 46 : return maybe_resize(bitset, to_add, allocator);
929 49 : }
930 :
931 : CCC_Result
932 8 : CCC_flat_bitset_copy(
933 : CCC_Flat_bitset *const destination,
934 : CCC_Flat_bitset const *const source,
935 : CCC_Allocator const *const allocator
936 : ) {
937 8 : if (!destination || !source || !allocator
938 6 : || (destination->capacity < source->capacity && !allocator->allocate)) {
939 3 : return CCC_RESULT_ARGUMENT_ERROR;
940 : }
941 5 : if (!source->capacity) {
942 1 : destination->count = 0;
943 1 : return CCC_RESULT_OK;
944 : }
945 4 : if (destination->capacity < source->capacity) {
946 6 : Bit_block *const new_data
947 12 : = allocator->allocate((CCC_Allocator_arguments){
948 3 : .input = destination->blocks,
949 3 : .bytes = block_count(source->capacity) * SIZEOF_BLOCK,
950 3 : .context = allocator->context,
951 : });
952 3 : if (!new_data) {
953 1 : return CCC_RESULT_ALLOCATOR_ERROR;
954 : }
955 2 : destination->blocks = new_data;
956 2 : destination->capacity = source->capacity;
957 3 : }
958 3 : if (!source->blocks || !destination->blocks) {
959 1 : return CCC_RESULT_ARGUMENT_ERROR;
960 : }
961 2 : destination->count = source->count;
962 2 : (void)memcpy(
963 2 : destination->blocks,
964 2 : source->blocks,
965 2 : block_count(source->capacity) * SIZEOF_BLOCK
966 : );
967 2 : fix_end(destination);
968 2 : return CCC_RESULT_OK;
969 8 : }
970 :
971 : void *
972 2 : CCC_flat_bitset_data(CCC_Flat_bitset const *const bitset) {
973 2 : if (!bitset) {
974 1 : return NULL;
975 : }
976 1 : return bitset->blocks;
977 2 : }
978 :
979 : CCC_Tribool
980 7 : CCC_flat_bitset_is_equal(
981 : CCC_Flat_bitset const *const left, CCC_Flat_bitset const *const right
982 : ) {
983 7 : if (!left || !right) {
984 2 : return CCC_TRIBOOL_ERROR;
985 : }
986 5 : if (left->count != right->count) {
987 1 : return CCC_FALSE;
988 : }
989 4 : if (!left->count) {
990 1 : return CCC_TRUE;
991 : }
992 6 : return memcmp(
993 3 : left->blocks,
994 3 : right->blocks,
995 3 : block_count(left->count) * SIZEOF_BLOCK
996 : )
997 3 : == 0;
998 7 : }
999 :
1000 : /*========================= Private Interface =========================*/
1001 :
1002 : CCC_Result
1003 46 : CCC_private_flat_bitset_reserve(
1004 : struct CCC_Flat_bitset *const bitset,
1005 : size_t const to_add,
1006 : CCC_Allocator const *const allocator
1007 : ) {
1008 46 : return CCC_flat_bitset_reserve(bitset, to_add, allocator);
1009 : }
1010 :
1011 : CCC_Tribool
1012 2508 : CCC_private_flat_bitset_set(
1013 : struct CCC_Flat_bitset *const bitset,
1014 : size_t const index,
1015 : CCC_Tribool const bit
1016 : ) {
1017 2508 : return CCC_flat_bitset_set(bitset, index, bit);
1018 : }
1019 :
1020 : /*======================= Static Helpers ==============================*/
1021 :
1022 : /** Assumes set size is greater than or equal to subset size. */
1023 : static inline CCC_Tribool
1024 9 : is_subset_of(
1025 : struct CCC_Flat_bitset const *const subset,
1026 : struct CCC_Flat_bitset const *const set
1027 : ) {
1028 9 : assert(set->count >= subset->count);
1029 69 : for (Block_count i = 0, end = block_count(subset->count); i < end; ++i) {
1030 : /* Invariant: the last N unused bits in a set zero so this works. */
1031 60 : if (!is_mask_match(set->blocks[i], subset->blocks[i])) {
1032 4 : return CCC_FALSE;
1033 : }
1034 56 : }
1035 5 : return CCC_TRUE;
1036 9 : }
1037 :
1038 : static CCC_Result
1039 1743 : maybe_resize(
1040 : struct CCC_Flat_bitset *const bitset,
1041 : size_t const to_add,
1042 : CCC_Allocator const *const allocator
1043 : ) {
1044 1743 : size_t bits_needed = bitset->count + to_add;
1045 1743 : if (bits_needed <= bitset->capacity) {
1046 1692 : return CCC_RESULT_OK;
1047 : }
1048 51 : if (!allocator->allocate) {
1049 3 : return CCC_RESULT_NO_ALLOCATION_FUNCTION;
1050 : }
1051 48 : if (to_add == 1) {
1052 2 : bits_needed <<= 1;
1053 2 : }
1054 : static_assert(
1055 : (BLOCK_BITS & (BLOCK_BITS - 1)) == 0,
1056 : "rounding trick only works for powers of 2"
1057 : );
1058 96 : size_t const new_capacity
1059 48 : = (bits_needed + (BLOCK_BITS - 1)) & ~((size_t)BLOCK_BITS - 1);
1060 48 : if (new_capacity <= bitset->capacity) {
1061 1 : return CCC_RESULT_ARGUMENT_ERROR;
1062 : }
1063 94 : size_t const new_bytes
1064 47 : = block_count(new_capacity - bitset->count) * SIZEOF_BLOCK;
1065 94 : size_t const old_bytes
1066 47 : = bitset->count ? block_count(bitset->count) * SIZEOF_BLOCK : 0;
1067 188 : Bit_block *const new_data = allocator->allocate((CCC_Allocator_arguments){
1068 47 : .input = bitset->blocks,
1069 47 : .bytes = block_count(new_capacity) * SIZEOF_BLOCK,
1070 47 : .context = allocator->context,
1071 : });
1072 47 : if (!new_data) {
1073 1 : return CCC_RESULT_ALLOCATOR_ERROR;
1074 : }
1075 46 : (void)memset((char *)new_data + old_bytes, 0, new_bytes);
1076 46 : bitset->capacity = new_capacity;
1077 46 : bitset->blocks = new_data;
1078 46 : return CCC_RESULT_OK;
1079 1743 : }
1080 :
1081 : /** A trailing bit in a range is the first bit set to the specified boolean
1082 : value in the provided range. The input i gives the starting bit of the search,
1083 : meaning a bit within a block that is the inclusive start of the range. The count
1084 : gives us the end of the search for an overall range of `[i, i + count)`. This
1085 : means if the search range is greater than a single block we will iterate in
1086 : ascending order through our blocks and from least to most significant bit within
1087 : each block. */
1088 : static CCC_Count
1089 3073 : first_trailing_bit_range(
1090 : struct CCC_Flat_bitset const *const bitset,
1091 : size_t const i,
1092 : size_t const count,
1093 : CCC_Tribool const is_one
1094 : ) {
1095 3073 : size_t const exclusive_end = i + count;
1096 3073 : if (!bitset || !count || i >= bitset->count || exclusive_end < i
1097 3072 : || exclusive_end > bitset->count) {
1098 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1099 : }
1100 3072 : Block_count start_block = block_count_index(i);
1101 3072 : Bit_count const start_bit = bit_count_index(i);
1102 3072 : Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
1103 3072 : if (start_bit + count < BLOCK_BITS) {
1104 65 : first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
1105 65 : }
1106 6144 : Bit_count trailing_zeros = count_trailing_zeros(
1107 3072 : first_block_mask
1108 3072 : & (is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block])
1109 : );
1110 3072 : if (trailing_zeros != BLOCK_BITS) {
1111 2112 : return (CCC_Count){
1112 1056 : .count = (start_block * BLOCK_BITS) + trailing_zeros,
1113 : };
1114 : }
1115 2016 : Block_count const end_block = block_count_index(exclusive_end - 1);
1116 2016 : if (end_block == start_block) {
1117 66 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1118 : }
1119 15364 : while (++start_block < end_block) {
1120 14338 : trailing_zeros = count_trailing_zeros(
1121 14338 : is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block]
1122 : );
1123 14338 : if (trailing_zeros != BLOCK_BITS) {
1124 1848 : return (CCC_Count){
1125 924 : .count = (start_block * BLOCK_BITS) + trailing_zeros,
1126 : };
1127 : }
1128 : }
1129 2052 : Bit_block const last_block_mask
1130 1026 : = trailing_ones_mask(bit_count_index(exclusive_end - 1) + 1);
1131 1026 : trailing_zeros = count_trailing_zeros(
1132 1026 : last_block_mask
1133 1026 : & (is_one ? bitset->blocks[end_block] : ~bitset->blocks[end_block])
1134 : );
1135 1026 : if (trailing_zeros != BLOCK_BITS) {
1136 134 : return (CCC_Count){
1137 67 : .count = (end_block * BLOCK_BITS) + trailing_zeros,
1138 : };
1139 : }
1140 959 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1141 3073 : }
1142 :
1143 : /** Finds the starting index of a sequence of 1's or 0's of the num_bits size in
1144 : linear time. The algorithm aims to efficiently skip as many bits as possible
1145 : while searching for the desired group. This avoids both an O(N^2) runtime and
1146 : the use of any unnecessary modulo or division operations in a hot loop. */
1147 : static CCC_Count
1148 17225 : first_trailing_bits_range( /* NOLINT (*cognitive-complexity) */
1149 : struct CCC_Flat_bitset const *const bitset,
1150 : size_t const i,
1151 : size_t const count,
1152 : size_t const num_bits,
1153 : CCC_Tribool const ones
1154 : ) {
1155 17225 : size_t const exclusive_end = i + count;
1156 17225 : if (!bitset || !count || !num_bits || i >= bitset->count || num_bits > count
1157 17218 : || exclusive_end < i || exclusive_end > bitset->count) {
1158 209 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1159 : }
1160 17016 : size_t bit_count = 0;
1161 17016 : size_t window_start = i;
1162 17016 : Block_count block_index = block_count_index(i);
1163 17016 : size_t window_end = (block_index * BLOCK_BITS) + BLOCK_BITS;
1164 17016 : Bit_count bit_index = bit_count_index(i);
1165 34032 : Bit_block bits
1166 17016 : = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
1167 17016 : bits &= leading_ones_mask(BLOCK_BITS - bit_index);
1168 125970 : for (;;) {
1169 125970 : if (window_end > exclusive_end) {
1170 6317 : bits &= trailing_ones_mask(bit_count_index(exclusive_end - 1) + 1);
1171 6317 : }
1172 125970 : if (!bits) {
1173 97187 : window_start = (block_index + 1) * BLOCK_BITS;
1174 97187 : bit_count = 0;
1175 97187 : } else {
1176 28783 : size_t bits_remain = num_bits - bit_count;
1177 : /* I would rather check for a prefix that could be completing in
1178 : this block here than check for a failure and reset to number of
1179 : bits on every iteration of the next loop. Think of this like loop
1180 : unrolling while also making next block simpler. */
1181 28783 : if (bits_remain <= BLOCK_BITS && bits_remain < num_bits) {
1182 10068 : assert(bit_index < BLOCK_BITS && "shifts are valid for block");
1183 10068 : Bit_block const shifted_bits = bits >> bit_index;
1184 10068 : if (is_mask_match(
1185 10068 : shifted_bits, trailing_ones_mask((Bit_count)bits_remain)
1186 : )) {
1187 6052 : return (CCC_Count){.count = window_start};
1188 : }
1189 4016 : bits_remain = num_bits;
1190 4016 : bit_index += count_trailing_zeros(~shifted_bits);
1191 4016 : bit_count = 0;
1192 4016 : window_start = (block_index * BLOCK_BITS) + bit_index;
1193 10068 : }
1194 22731 : if (bits_remain <= BLOCK_BITS) {
1195 10269 : assert(bit_index < BLOCK_BITS && "shifts are valid for block");
1196 10269 : Bit_block shifted_bits = bits >> bit_index;
1197 20538 : Bit_block const bits_remain_mask
1198 10269 : = trailing_ones_mask((Bit_count)bits_remain);
1199 : /* The loop continues only while our block is numerically
1200 : greater than the mask. Because unsigned integers are
1201 : represented in base 2 we get two automatic early exits here.
1202 : - If the block is missing a high-order bit in the
1203 : required mask, it is numerically smaller than the mask
1204 : and cannot match with further shifting.
1205 : - If all high bits match but some lower required bits are
1206 : zero, the block is numerically smaller than the mask
1207 : and cannot match with further shifting.
1208 : If the block has high order bits not in the mask it is
1209 : greater than the mask and we continue checking, which is
1210 : correct. This strategy optimizes out some useless shifts. */
1211 54884 : while (shifted_bits >= bits_remain_mask) {
1212 47162 : if (is_mask_match(shifted_bits, bits_remain_mask)) {
1213 5094 : return (CCC_Count){
1214 2547 : .count = (block_index * BLOCK_BITS) + bit_index,
1215 : };
1216 : }
1217 44615 : ++bit_index;
1218 44615 : shifted_bits >>= 1;
1219 : }
1220 7722 : bit_count = 0;
1221 10269 : }
1222 : /* 2 cases covered: the ones remaining are greater than this block
1223 : could hold or we did not find a match by the masking we just did.
1224 : In either case we need the maximum contiguous ones that run all
1225 : the way to the MSB. The best we could have is a full block of
1226 : 1's. Otherwise we need to find where to start our new search for
1227 : contiguous 1's. This could be the next block if there are not 1's
1228 : that continue to MSB. */
1229 20184 : Bit_count const leading_ones = count_leading_zeros(~bits);
1230 20184 : bit_count += leading_ones;
1231 20184 : if (leading_ones < BLOCK_BITS) {
1232 : window_start
1233 15658 : = (block_index * BLOCK_BITS) + (BLOCK_BITS - leading_ones);
1234 15658 : }
1235 28783 : }
1236 117371 : if (window_start + num_bits > exclusive_end) {
1237 8417 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1238 : }
1239 108954 : bit_index = 0;
1240 108954 : ++block_index;
1241 108954 : window_end += BLOCK_BITS;
1242 0 : assert(
1243 108954 : block_index < block_count_index(bitset->capacity)
1244 108954 : && "only load bits within block array capacity"
1245 : );
1246 : bits
1247 108954 : = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
1248 : }
1249 17225 : }
1250 :
1251 : /** A leading bit is the first bit in the range to be set to the indicated value
1252 : within a block starting the search from the Most Significant Bit of each block.
1253 : This means that if the range is larger than a single block we iterate in
1254 : descending order through the set of blocks starting at `i + count
1255 : - 1` for the range of `[i, i + count)`. The search within a given block
1256 : proceeds from Most Significant Bit toward Least Significant Bit. */
1257 : static CCC_Count
1258 3088 : first_leading_bit_range(
1259 : struct CCC_Flat_bitset const *const bitset,
1260 : size_t const start_index,
1261 : size_t const count,
1262 : CCC_Tribool const is_one
1263 : ) {
1264 3088 : if (!bitset || !count || start_index >= bitset->count
1265 3088 : || start_index + count <= start_index
1266 3088 : || start_index + count > bitset->count) {
1267 1022 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1268 : }
1269 2066 : size_t const window_start = start_index + count - 1;
1270 2066 : Bit_count const start_bit = bit_count_index(window_start);
1271 2066 : Bit_count const end_bit = bit_count_index(start_index);
1272 2066 : Block_count start_block = block_count_index(window_start);
1273 2066 : Bit_block first_block_mask = trailing_ones_mask(start_bit + 1);
1274 2066 : if (start_index + count < BLOCK_BITS) {
1275 76 : first_block_mask &= leading_ones_mask(BLOCK_BITS - end_bit);
1276 76 : }
1277 4132 : Bit_count leading_zeros = count_leading_zeros(
1278 2066 : first_block_mask
1279 2066 : & (is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block])
1280 : );
1281 2066 : if (leading_zeros != BLOCK_BITS) {
1282 2132 : return (CCC_Count){
1283 1066 : .count = (start_block * BLOCK_BITS)
1284 1066 : + (Block_count)(BLOCK_BITS - leading_zeros - 1),
1285 : };
1286 : }
1287 1000 : Block_count const end_block = block_count_index(start_index);
1288 1000 : if (start_block == end_block) {
1289 4 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1290 : }
1291 7774 : while (--start_block > end_block) {
1292 7702 : leading_zeros = count_leading_zeros(
1293 7702 : is_one ? bitset->blocks[start_block] : ~bitset->blocks[start_block]
1294 : );
1295 7702 : if (leading_zeros != BLOCK_BITS) {
1296 1848 : return (CCC_Count){
1297 924 : .count = (start_block * BLOCK_BITS)
1298 924 : + (Block_count)(BLOCK_BITS - leading_zeros - 1),
1299 : };
1300 : }
1301 : }
1302 72 : Bit_block const last_block_on = leading_ones_mask(BLOCK_BITS - end_bit);
1303 72 : leading_zeros = count_leading_zeros(
1304 72 : last_block_on
1305 72 : & (is_one ? bitset->blocks[end_block] : ~bitset->blocks[end_block])
1306 : );
1307 72 : if (leading_zeros != BLOCK_BITS) {
1308 138 : return (CCC_Count){
1309 69 : .count = (end_block * BLOCK_BITS)
1310 69 : + (Block_count)(BLOCK_BITS - leading_zeros - 1),
1311 : };
1312 : }
1313 3 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1314 3088 : }
1315 :
1316 : /** Iterating backward from MSB to LSB requires more care to avoid unsigned
1317 : integer wrapping. Therefore, this code is not identical to the trailing version
1318 : due to a few more branches. This function previously used signed types to avoid
1319 : this branching but that required making new signed types, copious casting, and
1320 : verification of input to be within ptrdiff_t limits. For consistency and
1321 : portability I think committing to unsigned is better for this function. */
1322 : static CCC_Count
1323 17189 : first_leading_bits_range( /* NOLINT (*cognitive-complexity) */
1324 : struct CCC_Flat_bitset const *const bitset,
1325 : size_t const index,
1326 : size_t const range_count,
1327 : size_t const bits_required,
1328 : CCC_Tribool const ones
1329 : ) {
1330 17189 : if (!bitset || !range_count || !bits_required || index >= bitset->count
1331 17188 : || bits_required > range_count) {
1332 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1333 : }
1334 17188 : size_t bit_count = 0;
1335 17188 : size_t window_start = (index + range_count - 1);
1336 17188 : Block_count block_index = block_count_index(window_start);
1337 17188 : Block_count window_inclusive_end = ((block_index * BLOCK_BITS));
1338 17188 : Bit_count bit_index = bit_count_index(window_start);
1339 34376 : Bit_block bits
1340 17188 : = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
1341 17188 : bits &= trailing_ones_mask(bit_index + 1);
1342 130737 : for (;;) {
1343 130737 : if (window_inclusive_end < index) {
1344 5545 : bits &= leading_ones_mask(BLOCK_BITS - bit_count_index(index));
1345 5545 : }
1346 130737 : if (!bits) {
1347 96355 : window_start = block_index ? (block_index * BLOCK_BITS) - 1 : 0;
1348 96355 : bit_count = 0;
1349 96355 : } else {
1350 34382 : size_t bits_remain = bits_required - bit_count;
1351 : /* I would rather check for a prefix that could be completing in
1352 : this block here than check for a failure and reset to number of
1353 : bits on every iteration of the next loop. Think of this like loop
1354 : unrolling while also making next block simpler. */
1355 34382 : if (bits_remain <= BLOCK_BITS && bits_remain < bits_required) {
1356 11987 : assert(bit_index < BLOCK_BITS && "shifts are valid for block");
1357 23974 : Bit_block const shifted_block = bits
1358 11987 : << (BLOCK_BITS - bit_index - 1);
1359 11987 : if (is_mask_match(
1360 11987 : shifted_block, leading_ones_mask((Bit_count)bits_remain)
1361 : )) {
1362 6038 : return (CCC_Count){.count = window_start};
1363 : }
1364 5949 : bits_remain = bits_required;
1365 11898 : Bit_count const leading_ones
1366 5949 : = count_leading_zeros(~shifted_block);
1367 5949 : assert(bit_index >= leading_ones && "index cannot underflow");
1368 5949 : bit_index -= leading_ones;
1369 5949 : window_start = (block_index * BLOCK_BITS) + bit_index;
1370 5949 : bit_count = 0;
1371 11987 : }
1372 28344 : if (bits_remain <= BLOCK_BITS) {
1373 13287 : assert(bit_index < BLOCK_BITS && "shifts are valid for block");
1374 13287 : Bit_block shifted_block = bits << (BLOCK_BITS - bit_index - 1);
1375 26574 : Bit_block const bits_remain_mask
1376 13287 : = leading_ones_mask((Bit_count)bits_remain);
1377 : /* Can't find a clever way to reduce shifts like trailing. */
1378 13287 : Bit_count const end_index = (Bit_count)(bits_remain - 1);
1379 103330 : while (bit_index >= end_index) {
1380 92586 : if (is_mask_match(shifted_block, bits_remain_mask)) {
1381 5086 : return (CCC_Count){
1382 2543 : .count = (block_index * BLOCK_BITS) + bit_index,
1383 : };
1384 : }
1385 90043 : --bit_index;
1386 90043 : shifted_block <<= 1;
1387 : }
1388 10744 : bit_count = 0;
1389 13287 : }
1390 25801 : Bit_count const trailing_ones = count_trailing_zeros(~bits);
1391 25801 : bit_count += trailing_ones;
1392 25801 : if (trailing_ones < BLOCK_BITS) {
1393 20402 : window_start = (block_index * BLOCK_BITS) + trailing_ones;
1394 20402 : if (window_start) {
1395 19876 : --window_start;
1396 19876 : }
1397 20402 : }
1398 34382 : }
1399 122156 : if (window_start < index + bits_required - 1) {
1400 8607 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1401 : }
1402 113549 : bit_index = BLOCK_BITS - 1;
1403 113549 : --block_index;
1404 113549 : window_inclusive_end = window_inclusive_end >= BLOCK_BITS
1405 113549 : ? window_inclusive_end - BLOCK_BITS
1406 : : 0;
1407 0 : assert(
1408 113549 : block_index < block_count_index(bitset->capacity)
1409 113549 : && "current block within range while iterating toward LSB"
1410 : );
1411 : bits
1412 113549 : = ones ? bitset->blocks[block_index] : ~bitset->blocks[block_index];
1413 : }
1414 17189 : }
1415 :
1416 : /** Performs the any or none scan operation over the specified range. The only
1417 : difference between the operations is the return value. Specify the desired
1418 : Tribool value to return upon encountering an on bit. For any this is CCC_TRUE.
1419 : For none this is CCC_FALSE. Saves writing two identical fns. */
1420 : static CCC_Tribool
1421 2571 : any_or_none_range(
1422 : struct CCC_Flat_bitset const *const bitset,
1423 : size_t const i,
1424 : size_t const count,
1425 : CCC_Tribool const any_or_none
1426 : ) {
1427 2571 : size_t const range_end = i + count;
1428 2571 : if (!bitset || !count || i >= bitset->count || range_end < i
1429 2570 : || range_end > bitset->count || any_or_none < CCC_FALSE
1430 2570 : || any_or_none > CCC_TRUE) {
1431 1 : return CCC_TRIBOOL_ERROR;
1432 : }
1433 2570 : Block_count start_block = block_count_index(i);
1434 2570 : Bit_count const start_bit = bit_count_index(i);
1435 2570 : Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
1436 2570 : if (start_bit + count < BLOCK_BITS) {
1437 18 : first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
1438 18 : }
1439 2570 : if (first_block_mask & bitset->blocks[start_block]) {
1440 166 : return any_or_none;
1441 : }
1442 2404 : Block_count const end_block = block_count_index(range_end - 1);
1443 2404 : if (end_block == start_block) {
1444 18 : return !any_or_none;
1445 : }
1446 : /* If this is the any check we might get lucky by checking the last
1447 : block before looping over everything. */
1448 4772 : Bit_block const last_block_mask
1449 2386 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
1450 2386 : if (last_block_mask & bitset->blocks[end_block]) {
1451 226 : return any_or_none;
1452 : }
1453 20864 : for (++start_block; start_block < end_block; ++start_block) {
1454 19600 : if (bitset->blocks[start_block] & BLOCK_ON) {
1455 896 : return any_or_none;
1456 : }
1457 18704 : }
1458 1264 : return !any_or_none;
1459 2571 : }
1460 :
1461 : /** Check for all on is slightly different from the any or none checks so we
1462 : need a painfully repetitive function. */
1463 : static CCC_Tribool
1464 1037 : all_range(
1465 : struct CCC_Flat_bitset const *const bitset,
1466 : size_t const i,
1467 : size_t const count
1468 : ) {
1469 1037 : size_t const range_end = i + count;
1470 1037 : if (!bitset || !count || i >= bitset->count || range_end < i
1471 1036 : || range_end > bitset->count) {
1472 1 : return CCC_TRIBOOL_ERROR;
1473 : }
1474 1036 : Block_count start_block = block_count_index(i);
1475 1036 : Bit_count const start_bit = bit_count_index(i);
1476 1036 : Bit_block first_block_mask = leading_ones_mask(BLOCK_BITS - start_bit);
1477 1036 : if (start_bit + count < BLOCK_BITS) {
1478 5 : first_block_mask &= trailing_ones_mask((Bit_count)(start_bit + count));
1479 5 : }
1480 1036 : if ((first_block_mask & bitset->blocks[start_block]) != first_block_mask) {
1481 771 : return CCC_FALSE;
1482 : }
1483 265 : Block_count const end_block = block_count_index(range_end - 1);
1484 265 : if (end_block == start_block) {
1485 4 : return CCC_TRUE;
1486 : }
1487 2079 : while (++start_block < end_block) {
1488 1819 : if (bitset->blocks[start_block] != BLOCK_ON) {
1489 1 : return CCC_FALSE;
1490 : }
1491 : }
1492 520 : Bit_block const last_block_mask
1493 260 : = trailing_ones_mask(bit_count_index(range_end - 1) + 1);
1494 260 : return is_mask_match(bitset->blocks[end_block], last_block_mask);
1495 1037 : }
1496 :
1497 : /** Given the 0 based index from `[0, count of used bits in set)` returns a
1498 : reference to the block that such a bit belongs to. This block reference will
1499 : point to some block at index [0, count of blocks used in the set). */
1500 : static inline Bit_block *
1501 40900 : block_at(
1502 : struct CCC_Flat_bitset const *const bitset, size_t const flat_bitset_index
1503 : ) {
1504 40900 : return &bitset->blocks[block_count_index(flat_bitset_index)];
1505 : }
1506 :
1507 : /** Sets all bits in bulk to value b and fixes the end block to ensure all bits
1508 : in the final block that are not in use are zeroed out. */
1509 : static inline void
1510 36 : set_all(struct CCC_Flat_bitset *const bitset, CCC_Tribool const b) {
1511 36 : int const v = b ? ~0 : 0;
1512 36 : (void)memset(bitset->blocks, v, block_count(bitset->count) * SIZEOF_BLOCK);
1513 36 : fix_end(bitset);
1514 36 : }
1515 :
1516 : /** Given the appropriate block in which bit_i resides, sets the bits position
1517 : to 0 or 1 as specified by the CCC_Tribool argument b.
1518 :
1519 : Assumes block has been retrieved correctly in range [0, count of blocks in set)
1520 : and that bit_i is in range [0, count of active bits in set). */
1521 : static inline void
1522 15412 : set(Bit_block *const block,
1523 : size_t const flat_bitset_index,
1524 : CCC_Tribool const b) {
1525 15412 : if (b) {
1526 8986 : *block |= on(flat_bitset_index);
1527 8986 : } else {
1528 6426 : *block &= ~on(flat_bitset_index);
1529 : }
1530 15412 : }
1531 :
1532 : /** Given the bit set and the set index--set index is allowed to be greater than
1533 : the size of one block--returns the status of the bit at that index. */
1534 : static inline CCC_Tribool
1535 20136 : status(Bit_block const *const bitset, size_t const flat_bitset_index) {
1536 : /* Be careful. The & op does not promise to evaluate to 1 or 0. We often
1537 : just use it where that conversion takes place implicitly for us. */
1538 20136 : return (*bitset & on(flat_bitset_index)) != 0;
1539 : }
1540 :
1541 : /** Given the true bit index in the bit set, expected to be in the range
1542 : [0, count of active bits in set), returns a Bit_block mask with only this bit
1543 : on in block to which it belongs. This mask guarantees to have a bit on within
1544 : a bit block at index [0, BIT_BLOCK_BITS - 1). */
1545 : static inline Bit_block
1546 35552 : on(size_t flat_bitset_index) {
1547 35552 : return (Bit_block)1 << bit_count_index(flat_bitset_index);
1548 : }
1549 :
1550 : /** Clears unused bits in the last block according to count. Sets the last block
1551 : to have only the used bits set to their given values and all bits after to zero.
1552 : This is used as a safety mechanism throughout the code after complex operations
1553 : on bit blocks to ensure any side effects on unused bits are deleted. */
1554 : static inline void
1555 19092 : fix_end(struct CCC_Flat_bitset *const bitset) {
1556 19092 : bitset->count
1557 19072 : ? (*block_at(bitset, bitset->count - 1)
1558 38144 : &= trailing_ones_mask(bit_count_index(bitset->count - 1) + 1))
1559 20 : : (bitset->blocks[0] = 0);
1560 19092 : }
1561 :
1562 : /** Returns the 0-based index of the block in the block array allocation to
1563 : which the given index belongs. Assumes the given index is somewhere between [0,
1564 : count of bits set). The returned index then represents the block in which this
1565 : index resides which is in the range [0, block containing last in use bit). */
1566 : static inline Block_count
1567 360618 : block_count_index(size_t const flat_bitset_index) {
1568 : static_assert(
1569 : (typeof(flat_bitset_index))~((typeof(flat_bitset_index))0)
1570 : >= (typeof(flat_bitset_index))0,
1571 : "shifting to avoid division with power of 2 divisor is only "
1572 : "defined for unsigned types"
1573 : );
1574 360618 : return flat_bitset_index >> BLOCK_BITS_LOG2;
1575 : }
1576 :
1577 : /** Returns the 0-based index within a block to which the given index belongs.
1578 : This index will always be between [0, BIT_BLOCK_BITS - 1). */
1579 : static inline Bit_count
1580 161186 : bit_count_index(size_t const flat_bitset_index) {
1581 161186 : return flat_bitset_index & (BLOCK_BITS - 1);
1582 : }
1583 :
1584 : /** Returns the number of blocks required to store the given bits. Assumes bits
1585 : is non-zero. For any bits > 1 the block count is always less than bits.*/
1586 : static inline Block_count
1587 9459 : block_count(size_t const bit_count) {
1588 : static_assert(
1589 : (typeof(bit_count))~((typeof(bit_count))0) >= (typeof(bit_count))0,
1590 : "shifting to avoid division with power of 2 divisor is only "
1591 : "defined for unsigned types"
1592 : );
1593 9459 : assert(bit_count && "calculating block count for non-empty bitset");
1594 9459 : return (bit_count + (BLOCK_BITS - 1)) >> BLOCK_BITS_LOG2;
1595 : }
1596 :
1597 : /** Returns min of size_t arguments. Beware of conversions. */
1598 : static inline size_t
1599 6 : size_t_min(size_t const a, size_t const b) {
1600 6 : return a < b ? a : b;
1601 : }
1602 :
1603 : /** Returns true if the on bit mask is present in the block. All one bits in the
1604 : mask must be found at the same positions in the block being queried. */
1605 : static inline CCC_Tribool
1606 162123 : is_mask_match(Bit_block const block, Bit_block const on_mask) {
1607 162123 : return (block & on_mask) == on_mask;
1608 : }
1609 :
1610 : /** Returns a mask of the specified count of trailing bits set to 1 (CCC_TRUE)
1611 : within a bit block. This is the same integer type that is stored in the bit set
1612 : integer block array. Trailing ones are ones starting from index 0, the Least
1613 : Significant Bit, counting toward the Most Significant Bit of the block. A count
1614 : of zero will return 0. A count equivalent to the block bit width will return a
1615 : mask with all bits set to 1. */
1616 : static inline Bit_block
1617 92434 : trailing_ones_mask(Bit_count const ones_count) {
1618 92434 : assert(ones_count <= BLOCK_BITS && "shift is well defined for mask");
1619 92434 : return ones_count ? BLOCK_ON >> (BLOCK_BITS - ones_count) : 0;
1620 : }
1621 :
1622 : /** Returns a mask of the specified count of leading bits set to 1 (CCC_TRUE)
1623 : within a bit block. This is the same integer type that is stored in the bit set
1624 : integer block array. Leading ones are ones starting from index BLOCK_BITS - 1,
1625 : the Most Significant Bit, counting toward 0, the Least Significant Bit, of the
1626 : block. A count of zero will return 0. A count equivalent to the block bit width
1627 : will return a mask with all bits set to 1. */
1628 : static inline Bit_block
1629 78939 : leading_ones_mask(Bit_count const ones_count) {
1630 78939 : assert(ones_count <= BLOCK_BITS && "shift is well defined for mask");
1631 78939 : return ones_count ? BLOCK_ON << (BLOCK_BITS - ones_count) : 0;
1632 : }
1633 :
1634 : /** The following asserts assure that whether portable or built in bit
1635 : operations are used in the coming section we are safe in our assumptions about
1636 : widths and counts. */
1637 : static_assert(
1638 : BLOCK_MSB < BLOCK_ON, "most significant bit is set for correct block width"
1639 : );
1640 : static_assert(
1641 : SIZEOF_BLOCK == sizeof(unsigned),
1642 : "builtins remain in sync with bitset block width"
1643 : );
1644 :
1645 : /** Much of the code relies on the assumption that iterating over blocks at
1646 : at a time is faster than using mathematical operations to conceptually iterate
1647 : over bits. This assumptions mostly comes from the use of these built-ins to
1648 : keep the processing time linear for range based queries, while avoiding division
1649 : and modulo operations. I should test to see the performance implications when
1650 : these built-ins are gone. However they are pretty ubiquitous these days. */
1651 :
1652 : /** Built-ins are common on Clang and GCC but we have portable fallback. */
1653 : #if defined(__has_builtin) && __has_builtin(__builtin_ctz) \
1654 : && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount)
1655 :
1656 : /** Counts the on bits in a bit block. */
1657 : static inline Bit_count
1658 227961 : popcount(Bit_block const b) {
1659 : /* There are different pop counts for different integer widths. Be sure
1660 : to catch the use of the wrong one by mistake here at compile time. */
1661 : static_assert(
1662 : __builtin_popcount((Bit_block)~0) <= U8_BLOCK_MAX,
1663 : "builtins return counts that are valid for smaller width types we use"
1664 : );
1665 227961 : return (Bit_count)__builtin_popcount(b);
1666 : }
1667 :
1668 : /** Counts the number of trailing zeros in a bit block starting from least
1669 : significant bit. */
1670 : static inline Bit_count
1671 48253 : count_trailing_zeros(Bit_block const b) {
1672 : static_assert(
1673 : __builtin_ctz(BLOCK_MSB) <= U8_BLOCK_MAX,
1674 : "builtins return counts that are valid for smaller width types we use"
1675 : );
1676 48253 : return b ? (Bit_count)__builtin_ctz(b) : BLOCK_BITS;
1677 : }
1678 :
1679 : /** Counts the leading zeros in a bit block starting from the most significant
1680 : bit. */
1681 : static inline Bit_count
1682 35973 : count_leading_zeros(Bit_block const b) {
1683 : static_assert(
1684 : __builtin_clz((Bit_block)1) <= U8_BLOCK_MAX,
1685 : "builtins return counts that are valid for smaller width types we use"
1686 : );
1687 35973 : return b ? (Bit_count)__builtin_clz(b) : BLOCK_BITS;
1688 : }
1689 :
1690 : #else /* !defined(__has_builtin) || !__has_builtin(__builtin_ctz) \
1691 : || !__has_builtin(__builtin_clz) || !__has_builtin(__builtin_popcount) */
1692 :
1693 : /** Counts the on bits in a bit block. */
1694 : static inline Bit_count
1695 : popcount(Bit_block b) {
1696 : Bit_count cnt = 0;
1697 : for (; b; cnt += ((b & 1U) != 0), b >>= 1U) {}
1698 : return cnt;
1699 : }
1700 :
1701 : /** Counts the number of trailing zeros in a bit block starting from least
1702 : significant bit. */
1703 : static inline Bit_count
1704 : count_trailing_zeros(Bit_block b) {
1705 : if (!b) {
1706 : return BIT_BLOCK_BITS;
1707 : }
1708 : Bit_count cnt = 0;
1709 : for (; (b & 1U) == 0; ++cnt, b >>= 1U) {}
1710 : return cnt;
1711 : }
1712 :
1713 : /** Counts the leading zeros in a bit block starting from the most significant
1714 : bit. */
1715 : static inline Bit_count
1716 : count_leading_zeros(Bit_block b) {
1717 : if (!b) {
1718 : return BIT_BLOCK_BITS;
1719 : }
1720 : Bit_count cnt = 0;
1721 : for (; (b & BIT_BLOCK_MSB) == 0; ++cnt, b <<= 1U) {}
1722 : return cnt;
1723 : }
1724 :
1725 : #endif /* defined(__has_builtin) && __has_builtin(__builtin_ctz) \
1726 : && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount) */
|