Line data Source code
1 : /** Copyright 2025 Alexander G. Lopez
2 :
3 : Licensed under the Apache License, Version 2.0 (the "License");
4 : you may not use this file except in compliance with the License.
5 : You may obtain a copy of the License at
6 :
7 : http://www.apache.org/licenses/LICENSE-2.0
8 :
9 : Unless required by applicable law or agreed to in writing, software
10 : distributed under the License is distributed on an "AS IS" BASIS,
11 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : See the License for the specific language governing permissions and
13 : limitations under the License.
14 :
15 : This file implements a Bit Set using blocks of platform defined integers
16 : for speed and efficiency. The implementation aims for constant and linear time
17 : operations at all times, specifically when implementing more complicated range
18 : based operations over the set.
19 :
20 : It is also important to avoid modulo and division operations whenever possible.
21 : This is why much of the code revolves around obtaining indices by processing
22 : entire blocks at a time, rather than using mathematical operations to
23 : conceptually iterate over individual bits.
24 :
25 : Finally, the code is able to unite most functions for finding zeros or ones
26 : into a single function that accepts true or false as an additional argument.
27 : This is because every search for a zero can be solved by bitwise inverting a
28 : block and searching for a 1 instead. This elimination of identical functions
29 : costs a single branch in the function and is worth it to avoid code duplication
30 : and bug doubling. */
31 : /** C23 provided headers. */
32 : #include <limits.h>
33 : #include <stddef.h>
34 : #include <stdint.h>
35 :
36 : /** CCC provided headers. */
37 : #include "ccc/bitset.h"
38 : #include "ccc/configuration.h"
39 : #include "ccc/private/private_bitset.h"
40 : #include "ccc/types.h"
41 :
42 : /*========================= Type Declarations ============================*/
43 :
44 : /** @internal The type representing the word size used for each block of the
45 : Bitset. This type may vary depending on the platform in order to maximize
46 : performance, so it is given a type. The implementation then should not concern
47 : itself with the word size.
48 :
49 : The implementation may assume that the block is a standard integer width on
50 : the given platform that is compatible with all basic arithmetic and bitwise
51 : operations. If SIMD is implemented then multiple blocks may be processed under
52 : this same assumption. */
53 : typedef typeof(*(struct CCC_Bitset){}.blocks) Bit_block;
54 :
55 : /** @internal Used frequently so call the builtin just once. */
56 : enum : size_t {
57 : /** @internal Bytes of a bit block to help with byte calculations. */
58 : SIZEOF_BLOCK = sizeof(Bit_block),
59 : };
60 :
61 : /** @internal Various constants to support bit block size bit ops. */
62 : enum : Bit_block {
63 : /** @internal A mask of a bit block with all bits on. */
64 : BIT_BLOCK_ON = ((Bit_block)~0),
65 : /** @internal The Most Significant Bit of a bit block turned on to 1. */
66 : BIT_BLOCK_MSB = (((Bit_block)1) << (((SIZEOF_BLOCK * CHAR_BIT)) - 1)),
67 : };
68 :
69 : /** @internal An index into the block array or count of bit blocks. The block
70 : array is constructed by the number of blocks required to support the current bit
71 : set capacity. Assume this index type has range [0, block count to support N
72 : bits].
73 :
74 : User input is given as a `size_t` so distinguish from that input with this type
75 : to make it clear to the reader the index refers to a block not the given bit
76 : index the user has provided. */
77 : typedef size_t Block_count;
78 :
79 : /** @internal A signed index into the block array. The block array is built by
80 : the number of blocks required to support the current bit set capacity. Assume
81 : this index type has range [-1, count of blocks needed to hold all bits in set].
82 : This makes reverse iteration problems easier.
83 :
84 : Most platforms will allow for this signed type to index any bit block that the
85 : unsigned equivalent can index into. However, one should always check that the
86 : unsigned value can safely be cast to signed.
87 :
88 : User input is given as a `size_t` so distinguish from that input with this type
89 : to make it clear to the reader the index refers to a block not the given bit
90 : index the user has provided. */
91 : typedef ptrdiff_t Block_signed_count;
92 :
93 : /** @internal An index within a block. A block is set to some number of bits
94 : as determined by the type used for each block. This type is intended to count
95 : bits in a block and therefore cannot count up to arbitrary indices. Assume its
96 : range is `[0, BIT_BLOCK_BITS]`, for ease of use and clarity.
97 :
98 : There are many types of indexing and counting that take place in a bit set so
99 : use this type specifically for counting bits in a block so the reader is clear
100 : on intent. */
101 : typedef uint8_t Bit_count;
102 :
103 : enum : Bit_count {
104 : /** @internal How many total bits that fit in a bit block. */
105 : BIT_BLOCK_BITS = (SIZEOF_BLOCK * CHAR_BIT),
106 : /** @internal Used for static assert clarity. */
107 : U8_BLOCK_MAX = UINT8_MAX,
108 : /** @internal Hand coded log2 of block bits to avoid division. */
109 : BIT_BLOCK_BITS_LOG2 = 5,
110 : };
111 : static_assert(
112 : (BIT_BLOCK_BITS & (BIT_BLOCK_BITS - 1)) == 0,
113 : "the number of bits in a block is always a power of two, "
114 : "helping avoid division and modulo operations when possible"
115 : );
116 : static_assert(
117 : BIT_BLOCK_BITS >> BIT_BLOCK_BITS_LOG2 == 1,
118 : "hand coded log2 of bitblock bits is always correct"
119 : );
120 : static_assert(
121 : (Bit_count) ~((Bit_count)0) >= (Bit_count)0, "Bit_count must be unsigned"
122 : );
123 : static_assert(UINT8_MAX >= BIT_BLOCK_BITS, "Bit_count counts all block bits.");
124 :
125 : /** @internal A signed index within a block. A block is set to some number
126 : of bits as determined by the type used for each block. This type is intended to
127 : count bits in a block and therefore cannot count up to arbitrary indices. Assume
128 : its range is `[-1, BIT_BLOCK_BITS]`, for ease of use and clarity.
129 :
130 : There are many types of indexing and counting that take place in a bit set so
131 : use this type specifically for counting bits in a block so the reader is clear
132 : on intent. It helps clean up algorithms for finding ranges of leading bits.
133 : It also a wider type to avoid warnings or dangers when taking the value of a
134 : `Bit_count` type. */
135 : typedef int16_t Bit_signed_count;
136 : static_assert(
137 : sizeof(Bit_signed_count) > sizeof(Bit_count),
138 : "Bit_signed_count x = (Bit_signed_count)x_bit_count; // is safe"
139 : );
140 : static_assert(
141 : (Bit_signed_count) ~((Bit_signed_count)0) < (Bit_signed_count)0,
142 : "Bit_signed_count must be signed"
143 : );
144 : static_assert(
145 : INT16_MAX >= BIT_BLOCK_BITS, "Bit_signed_count counts all block bits"
146 : );
147 :
148 : /** @internal A helper to allow for an efficient linear scan for groups of 0's
149 : or 1's in the set. */
150 : struct Group_count {
151 : /** A index [0, bit block bits] indicating the status of a search. */
152 : Bit_count index;
153 : /** The number of bits of same value found starting at block_start_i. */
154 : Bit_count count;
155 : };
156 :
157 : /** @internal A signed helper to allow for an efficient linear scan for groups
158 : of 0's or 1's in the set. Created to support -1 index return values for cleaner
159 : group scanning in reverse. */
160 : struct Group_signed_count {
161 : /** A index [-1, bit block bits] indicating the status of a search. */
162 : Bit_signed_count index;
163 : /** The number of bits of same value found starting at block_start_i. */
164 : Bit_signed_count count;
165 : };
166 :
167 : /*========================= Prototypes ============================*/
168 :
169 : static size_t block_count_index(size_t);
170 : static inline Bit_block *block_at(struct CCC_Bitset const *, size_t);
171 : static void set(Bit_block *, size_t, CCC_Tribool);
172 : static Bit_block on(size_t);
173 : static void fix_end(struct CCC_Bitset *);
174 : static CCC_Tribool status(Bit_block const *, size_t);
175 : static size_t block_count(size_t);
176 : static CCC_Tribool
177 : any_or_none_range(struct CCC_Bitset const *, size_t, size_t, CCC_Tribool);
178 : static CCC_Tribool all_range(struct CCC_Bitset const *, size_t, size_t);
179 : static CCC_Count first_trailing_bit_range(
180 : struct CCC_Bitset const *, size_t, size_t, CCC_Tribool
181 : );
182 : static CCC_Count
183 : first_leading_bit_range(struct CCC_Bitset const *, size_t, size_t, CCC_Tribool);
184 : static CCC_Count first_trailing_bits_range(
185 : struct CCC_Bitset const *, size_t, size_t, size_t, CCC_Tribool
186 : );
187 : static CCC_Count first_leading_bits_range(
188 : struct CCC_Bitset const *, size_t, size_t, size_t, CCC_Tribool
189 : );
190 : static struct Group_count max_trailing_ones(Bit_block, Bit_count, size_t);
191 : static struct Group_signed_count
192 : max_leading_ones(Bit_block, Bit_signed_count, size_t);
193 : static CCC_Result
194 : maybe_resize(struct CCC_Bitset *, size_t, CCC_Allocator const *);
195 : static size_t size_t_min(size_t, size_t);
196 : static void set_all(struct CCC_Bitset *, CCC_Tribool);
197 : static Bit_count bit_count_index(size_t);
198 : static CCC_Tribool
199 : is_subset_of(struct CCC_Bitset const *, struct CCC_Bitset const *);
200 : static Bit_count popcount(Bit_block);
201 : static Bit_count count_trailing_zeros(Bit_block);
202 : static Bit_count count_leading_zeros(Bit_block);
203 :
204 : /*======================= Public Interface ==============================*/
205 :
206 : CCC_Tribool
207 4 : CCC_bitset_is_proper_subset(
208 : CCC_Bitset const *const subset, CCC_Bitset const *const set
209 : ) {
210 4 : if (!set || !subset) {
211 2 : return CCC_TRIBOOL_ERROR;
212 : }
213 2 : if (set->count <= subset->count) {
214 1 : return CCC_FALSE;
215 : }
216 1 : return is_subset_of(subset, set);
217 4 : }
218 :
219 : CCC_Tribool
220 11 : CCC_bitset_is_subset(
221 : CCC_Bitset const *const subset, CCC_Bitset const *const set
222 : ) {
223 11 : if (!set || !subset) {
224 2 : return CCC_TRIBOOL_ERROR;
225 : }
226 9 : if (set->count < subset->count) {
227 1 : return CCC_FALSE;
228 : }
229 8 : return is_subset_of(subset, set);
230 11 : }
231 :
232 : CCC_Result
233 8 : CCC_bitset_or(CCC_Bitset *const destination, CCC_Bitset const *const source) {
234 8 : if (!destination || !source) {
235 2 : return CCC_RESULT_ARGUMENT_ERROR;
236 : }
237 6 : if (!destination->count || !source->count) {
238 4 : return CCC_RESULT_OK;
239 : }
240 4 : Block_count const end_block
241 2 : = block_count(size_t_min(destination->count, source->count));
242 26 : for (size_t b = 0; b < end_block; ++b) {
243 24 : destination->blocks[b] |= source->blocks[b];
244 24 : }
245 2 : fix_end(destination);
246 2 : return CCC_RESULT_OK;
247 8 : }
248 :
249 : CCC_Result
250 6 : CCC_bitset_xor(CCC_Bitset *const destination, CCC_Bitset const *const source) {
251 6 : if (!destination || !source) {
252 2 : return CCC_RESULT_ARGUMENT_ERROR;
253 : }
254 4 : if (!destination->count || !source->count) {
255 2 : return CCC_RESULT_OK;
256 : }
257 4 : Block_count const end_block
258 2 : = block_count(size_t_min(destination->count, source->count));
259 26 : for (Block_count b = 0; b < end_block; ++b) {
260 24 : destination->blocks[b] ^= source->blocks[b];
261 24 : }
262 2 : fix_end(destination);
263 2 : return CCC_RESULT_OK;
264 6 : }
265 :
266 : CCC_Result
267 6 : CCC_bitset_and(CCC_Bitset *destination, CCC_Bitset const *source) {
268 6 : if (!destination || !source) {
269 2 : return CCC_RESULT_ARGUMENT_ERROR;
270 : }
271 4 : if (!source->count) {
272 1 : set_all(destination, CCC_FALSE);
273 1 : return CCC_RESULT_OK;
274 : }
275 3 : if (!destination->count) {
276 1 : return CCC_RESULT_OK;
277 : }
278 4 : Block_count const end_block
279 2 : = block_count(size_t_min(destination->count, source->count));
280 26 : for (Block_count b = 0; b < end_block; ++b) {
281 24 : destination->blocks[b] &= source->blocks[b];
282 24 : }
283 2 : if (destination->count <= source->count) {
284 1 : return CCC_RESULT_OK;
285 : }
286 : /* The source widens to align with destination as integers would; same
287 : consequences. */
288 1 : Block_count const destination_blocks = block_count(destination->count);
289 1 : Block_count const remain = destination_blocks - end_block;
290 2 : (void)memset(
291 2 : destination->blocks + end_block, CCC_FALSE, remain * SIZEOF_BLOCK
292 : );
293 1 : fix_end(destination);
294 1 : return CCC_RESULT_OK;
295 6 : }
296 :
297 : CCC_Result
298 9 : CCC_bitset_shift_left(CCC_Bitset *const bitset, size_t const left_shifts) {
299 9 : if (!bitset) {
300 1 : return CCC_RESULT_ARGUMENT_ERROR;
301 : }
302 8 : if (!bitset->count || !left_shifts) {
303 1 : return CCC_RESULT_OK;
304 : }
305 7 : if (left_shifts >= bitset->count) {
306 1 : set_all(bitset, CCC_FALSE);
307 1 : return CCC_RESULT_OK;
308 : }
309 6 : Block_count const end = block_count_index(bitset->count - 1);
310 6 : Block_count const blocks = block_count_index(left_shifts);
311 6 : Bit_count const split = bit_count_index(left_shifts);
312 6 : if (!split) {
313 31 : for (Block_count shift = end - blocks + 1, write = end; shift--;
314 29 : --write) {
315 29 : bitset->blocks[write] = bitset->blocks[shift];
316 29 : }
317 2 : } else {
318 4 : Bit_count const remain = BIT_BLOCK_BITS - split;
319 32 : for (Block_count shift = end - blocks, write = end; shift > 0;
320 28 : --shift, --write) {
321 56 : bitset->blocks[write] = (bitset->blocks[shift] << split)
322 28 : | (bitset->blocks[shift - 1] >> remain);
323 28 : }
324 4 : bitset->blocks[blocks] = bitset->blocks[0] << split;
325 4 : }
326 : /* Zero fills in lower bits just as an integer shift would. */
327 26 : for (Block_count i = 0; i < blocks; ++i) {
328 20 : bitset->blocks[i] = 0;
329 20 : }
330 6 : fix_end(bitset);
331 6 : return CCC_RESULT_OK;
332 9 : }
333 :
334 : CCC_Result
335 9 : CCC_bitset_shift_right(CCC_Bitset *const bitset, size_t const right_shifts) {
336 9 : if (!bitset) {
337 1 : return CCC_RESULT_ARGUMENT_ERROR;
338 : }
339 8 : if (!bitset->count || !right_shifts) {
340 1 : return CCC_RESULT_OK;
341 : }
342 7 : if (right_shifts >= bitset->count) {
343 1 : set_all(bitset, CCC_FALSE);
344 1 : return CCC_RESULT_OK;
345 : }
346 6 : Block_count const end = block_count_index(bitset->count - 1);
347 6 : Block_count const blocks = block_count_index(right_shifts);
348 6 : Bit_count split = bit_count_index(right_shifts);
349 6 : if (!split) {
350 31 : for (Block_count shift = blocks, write = 0; shift < end + 1;
351 29 : ++shift, ++write) {
352 29 : bitset->blocks[write] = bitset->blocks[shift];
353 29 : }
354 2 : } else {
355 4 : Bit_count const remain = BIT_BLOCK_BITS - split;
356 32 : for (Block_count shift = blocks, write = 0; shift < end;
357 28 : ++shift, ++write) {
358 56 : bitset->blocks[write] = (bitset->blocks[shift + 1] << remain)
359 28 : | (bitset->blocks[shift] >> split);
360 28 : }
361 4 : bitset->blocks[end - blocks] = bitset->blocks[end] >> split;
362 4 : }
363 : /* This is safe for a few reasons:
364 : - If shifts equals count we set all to 0 and returned early.
365 : - If we only have one block i will be equal to end and we are done.
366 : - If end is the 0th block we will stop after 1. A meaningful shift
367 : occurred in the 0th block so zeroing would be a mistake.
368 : - All other cases ensure it is safe to decrease i (no underflow).
369 : This operation emulates the zeroing of high bits on a right shift and
370 : a bit set is considered unsigned so we don't sign bit fill. */
371 26 : for (Block_count i = end; i > end - blocks; --i) {
372 20 : bitset->blocks[i] = 0;
373 20 : }
374 6 : fix_end(bitset);
375 6 : return CCC_RESULT_OK;
376 9 : }
377 :
378 : CCC_Tribool
379 4144 : CCC_bitset_test(CCC_Bitset const *const bitset, size_t const i) {
380 4144 : if (!bitset) {
381 1 : return CCC_TRIBOOL_ERROR;
382 : }
383 4143 : if (i >= bitset->count) {
384 7 : return CCC_TRIBOOL_ERROR;
385 : }
386 4136 : return status(block_at(bitset, i), i);
387 4144 : }
388 :
389 : CCC_Tribool
390 11225 : CCC_bitset_set(CCC_Bitset *const bitset, size_t const i, CCC_Tribool const b) {
391 11225 : if (!bitset) {
392 1 : return CCC_TRIBOOL_ERROR;
393 : }
394 11224 : if (i >= bitset->count) {
395 2 : return CCC_TRIBOOL_ERROR;
396 : }
397 11222 : Bit_block *const block = block_at(bitset, i);
398 11222 : CCC_Tribool const was = status(block, i);
399 11222 : set(block, i, b);
400 11222 : return was;
401 11225 : }
402 :
403 : CCC_Result
404 34 : CCC_bitset_set_all(CCC_Bitset *const bitset, CCC_Tribool const b) {
405 34 : if (!bitset) {
406 1 : return CCC_RESULT_ARGUMENT_ERROR;
407 : }
408 33 : if (bitset->count) {
409 33 : set_all(bitset, b);
410 33 : }
411 33 : return CCC_RESULT_OK;
412 34 : }
413 :
414 : /** A naive implementation might just call set for every index between the
415 : start and start + count. However, calculating the block and index within each
416 : block for every call to set costs a division and a modulo operation. This also
417 : loads and stores a block multiple times just to set each bit within a block to
418 : the same value. We can avoid this by handling the first and last block with one
419 : operations and then handling everything in between with a bulk memset. */
420 : CCC_Result
421 10452 : CCC_bitset_set_range(
422 : CCC_Bitset *const bitset,
423 : size_t const range_start_index,
424 : size_t const range_bit_count,
425 : CCC_Tribool const b
426 : ) {
427 10452 : size_t const end_i = range_start_index + range_bit_count;
428 10452 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
429 10452 : || end_i < range_start_index || end_i > bitset->count) {
430 1 : return CCC_RESULT_ARGUMENT_ERROR;
431 : }
432 10451 : Block_count start_block = block_count_index(range_start_index);
433 10451 : Bit_count const start_bit = bit_count_index(range_start_index);
434 10451 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
435 10451 : if (start_bit + range_bit_count < BIT_BLOCK_BITS) {
436 1687 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + range_bit_count));
437 1687 : }
438 :
439 : /* Logic is uniform except for key lines to turn bits on or off. */
440 10451 : b ? (bitset->blocks[start_block] |= first_block_on)
441 4201 : : (bitset->blocks[start_block] &= ~first_block_on);
442 :
443 10451 : Block_count const end_block = block_count_index(end_i - 1);
444 10451 : if (end_block == start_block) {
445 1972 : fix_end(bitset);
446 1972 : return CCC_RESULT_OK;
447 : }
448 8479 : if (++start_block != end_block) {
449 5766 : int const v = b ? ~0 : 0;
450 11532 : (void)memset(
451 5766 : &bitset->blocks[start_block],
452 5766 : v,
453 5766 : (end_block - start_block) * SIZEOF_BLOCK
454 : );
455 5766 : }
456 8479 : Bit_count const end_bit = bit_count_index(end_i - 1);
457 16958 : Bit_block const last_block_on
458 8479 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
459 :
460 : /* Same as first block but we are careful about bits past our range. */
461 8479 : b ? (bitset->blocks[end_block] |= last_block_on)
462 3248 : : (bitset->blocks[end_block] &= ~last_block_on);
463 :
464 8479 : fix_end(bitset);
465 8479 : return CCC_RESULT_OK;
466 10452 : }
467 :
468 : CCC_Tribool
469 4 : CCC_bitset_reset(CCC_Bitset *const bitset, size_t const i) {
470 4 : if (!bitset) {
471 1 : return CCC_TRIBOOL_ERROR;
472 : }
473 3 : if (i >= bitset->count) {
474 1 : return CCC_TRIBOOL_ERROR;
475 : }
476 2 : Bit_block *const block = block_at(bitset, i);
477 2 : CCC_Tribool const was = status(block, i);
478 2 : *block &= ~on(i);
479 2 : fix_end(bitset);
480 2 : return was;
481 4 : }
482 :
483 : CCC_Result
484 11 : CCC_bitset_reset_all(CCC_Bitset *const bitset) {
485 11 : if (!bitset) {
486 1 : return CCC_RESULT_ARGUMENT_ERROR;
487 : }
488 10 : if (bitset->count) {
489 10 : (void)memset(
490 10 : bitset->blocks, CCC_FALSE, block_count(bitset->count) * SIZEOF_BLOCK
491 : );
492 10 : }
493 10 : return CCC_RESULT_OK;
494 11 : }
495 :
496 : /** Same concept as set range but easier. Handle first and last then set
497 : everything in between to false with memset. */
498 : CCC_Result
499 2050 : CCC_bitset_reset_range(
500 : CCC_Bitset *const bitset,
501 : size_t const range_start_index,
502 : size_t const range_bit_count
503 : ) {
504 2050 : size_t const end_i = range_start_index + range_bit_count;
505 2050 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
506 2049 : || end_i < range_start_index || end_i > bitset->count) {
507 1 : return CCC_RESULT_ARGUMENT_ERROR;
508 : }
509 2049 : Block_count start_block = block_count_index(range_start_index);
510 2049 : Bit_count const start_bit = bit_count_index(range_start_index);
511 2049 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
512 2049 : if (start_bit + range_bit_count < BIT_BLOCK_BITS) {
513 33 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + range_bit_count));
514 33 : }
515 2049 : bitset->blocks[start_block] &= ~first_block_on;
516 2049 : Block_count const end_block = block_count_index(end_i - 1);
517 2049 : if (end_block == start_block) {
518 66 : fix_end(bitset);
519 66 : return CCC_RESULT_OK;
520 : }
521 1983 : if (++start_block != end_block) {
522 1792 : (void)memset(
523 1792 : &bitset->blocks[start_block],
524 : CCC_FALSE,
525 1792 : (end_block - start_block) * SIZEOF_BLOCK
526 : );
527 1792 : }
528 1983 : Bit_count const end_bit = bit_count_index(end_i - 1);
529 3966 : Bit_block const last_block_on
530 1983 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
531 1983 : bitset->blocks[end_block] &= ~last_block_on;
532 1983 : fix_end(bitset);
533 1983 : return CCC_RESULT_OK;
534 2050 : }
535 :
536 : CCC_Tribool
537 4 : CCC_bitset_flip(CCC_Bitset *const bitset, size_t const i) {
538 4 : if (!bitset || i > bitset->count) {
539 2 : return CCC_TRIBOOL_ERROR;
540 : }
541 2 : Block_count const b_i = block_count_index(i);
542 2 : Bit_block *const block = &bitset->blocks[b_i];
543 2 : CCC_Tribool const was = status(block, i);
544 2 : *block ^= on(i);
545 2 : fix_end(bitset);
546 2 : return was;
547 4 : }
548 :
549 : CCC_Result
550 3 : CCC_bitset_flip_all(CCC_Bitset *const bitset) {
551 3 : if (!bitset) {
552 1 : return CCC_RESULT_ARGUMENT_ERROR;
553 : }
554 2 : if (!bitset->count) {
555 1 : return CCC_RESULT_OK;
556 : }
557 1 : Block_count const end = block_count(bitset->count);
558 2 : for (size_t i = 0; i < end; ++i) {
559 1 : bitset->blocks[i] = ~bitset->blocks[i];
560 1 : }
561 1 : fix_end(bitset);
562 1 : return CCC_RESULT_OK;
563 3 : }
564 :
565 : /** Maybe future SIMD vectorization could speed things up here because we use
566 : the same strat of handling first and last which just leaves a simpler bulk
567 : operation in the middle. But we don't benefit from memset here. */
568 : CCC_Result
569 2563 : CCC_bitset_flip_range(
570 : CCC_Bitset *const bitset,
571 : size_t const range_start_index,
572 : size_t const range_bit_count
573 : ) {
574 2563 : size_t const end_i = range_start_index + range_bit_count;
575 2563 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
576 2562 : || end_i < range_start_index || end_i > bitset->count) {
577 3 : return CCC_RESULT_ARGUMENT_ERROR;
578 : }
579 2560 : Block_count start_block = block_count_index(range_start_index);
580 2560 : Bit_count const start_bit = bit_count_index(range_start_index);
581 2560 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
582 2560 : if (start_bit + range_bit_count < BIT_BLOCK_BITS) {
583 62 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + range_bit_count));
584 62 : }
585 2560 : bitset->blocks[start_block] ^= first_block_on;
586 2560 : Block_count const end_block = block_count_index(end_i - 1);
587 2560 : if (end_block == start_block) {
588 128 : fix_end(bitset);
589 128 : return CCC_RESULT_OK;
590 : }
591 19456 : while (++start_block < end_block) {
592 17024 : bitset->blocks[start_block] = ~bitset->blocks[start_block];
593 : }
594 2432 : Bit_count const end_bit = bit_count_index(end_i - 1);
595 4864 : Bit_block const last_block_on
596 2432 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
597 2432 : bitset->blocks[end_block] ^= last_block_on;
598 2432 : fix_end(bitset);
599 2432 : return CCC_RESULT_OK;
600 2563 : }
601 :
602 : CCC_Count
603 76 : CCC_bitset_capacity(CCC_Bitset const *const bitset) {
604 76 : if (!bitset) {
605 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
606 : }
607 75 : return (CCC_Count){.count = bitset->capacity};
608 76 : }
609 :
610 : CCC_Count
611 2 : CCC_bitset_blocks_capacity(CCC_Bitset const *const bitset) {
612 2 : if (!bitset) {
613 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
614 : }
615 2 : return (CCC_Count){
616 1 : .count = block_count(bitset->capacity),
617 : };
618 2 : }
619 :
620 : CCC_Count
621 1679 : CCC_bitset_count(CCC_Bitset const *const bitset) {
622 1679 : if (!bitset) {
623 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
624 : }
625 1678 : return (CCC_Count){.count = bitset->count};
626 1679 : }
627 :
628 : CCC_Count
629 2 : CCC_bitset_blocks_count(CCC_Bitset const *const bitset) {
630 2 : if (!bitset) {
631 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
632 : }
633 2 : return (CCC_Count){
634 1 : .count = block_count(bitset->count),
635 : };
636 2 : }
637 :
638 : CCC_Tribool
639 2304 : CCC_bitset_is_empty(CCC_Bitset const *const bitset) {
640 2304 : if (!bitset) {
641 1 : return CCC_TRIBOOL_ERROR;
642 : }
643 2303 : return !bitset->count;
644 2304 : }
645 :
646 : CCC_Count
647 9297 : CCC_bitset_popcount(CCC_Bitset const *const bitset) {
648 9297 : if (!bitset) {
649 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
650 : }
651 9296 : if (!bitset->count) {
652 9 : return (CCC_Count){.count = 0};
653 : }
654 9287 : Block_count const end = block_count(bitset->count);
655 9287 : size_t count = 0;
656 157390 : for (Block_count i = 0; i < end; ++i) {
657 148103 : count += popcount(bitset->blocks[i]);
658 148103 : }
659 9287 : return (CCC_Count){.count = count};
660 9297 : }
661 :
662 : CCC_Count
663 9220 : CCC_bitset_popcount_range(
664 : CCC_Bitset const *const bitset,
665 : size_t const range_start_index,
666 : size_t const range_bit_count
667 : ) {
668 9220 : size_t const end_i = range_start_index + range_bit_count;
669 9220 : if (!bitset || !range_bit_count || range_start_index >= bitset->count
670 9218 : || end_i < range_start_index || end_i > bitset->count) {
671 2 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
672 : }
673 9218 : size_t popped = 0;
674 9218 : Block_count start_block = block_count_index(range_start_index);
675 9218 : Bit_count const start_bit = bit_count_index(range_start_index);
676 9218 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
677 9218 : if (start_bit + range_bit_count < BIT_BLOCK_BITS) {
678 188 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + range_bit_count));
679 188 : }
680 9218 : popped += popcount(first_block_on & bitset->blocks[start_block]);
681 9218 : Block_count const end_block = block_count_index(end_i - 1);
682 9218 : if (end_block == start_block) {
683 388 : return (CCC_Count){.count = popped};
684 : }
685 70640 : while (++start_block < end_block) {
686 61810 : popped += popcount(bitset->blocks[start_block]);
687 : }
688 8830 : Bit_count const end_bit = bit_count_index(end_i - 1);
689 17660 : Bit_block const last_block_on
690 8830 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
691 8830 : popped += popcount(last_block_on & bitset->blocks[end_block]);
692 8830 : return (CCC_Count){.count = popped};
693 9220 : }
694 :
695 : CCC_Result
696 1700 : CCC_bitset_push_back(
697 : CCC_Bitset *const bitset,
698 : CCC_Tribool const b,
699 : CCC_Allocator const *const allocator
700 : ) {
701 1700 : if (!bitset || !allocator || b > CCC_TRUE || b < CCC_FALSE) {
702 3 : return CCC_RESULT_ARGUMENT_ERROR;
703 : }
704 1697 : CCC_Result const check_resize = maybe_resize(bitset, 1, allocator);
705 1697 : if (check_resize != CCC_RESULT_OK) {
706 3 : return check_resize;
707 : }
708 1694 : ++bitset->count;
709 1694 : set(block_at(bitset, bitset->count - 1), bitset->count - 1, b);
710 1694 : fix_end(bitset);
711 1694 : return CCC_RESULT_OK;
712 1700 : }
713 :
714 : CCC_Tribool
715 2279 : CCC_bitset_pop_back(CCC_Bitset *const bitset) {
716 2279 : if (!bitset || !bitset->count) {
717 1 : return CCC_TRIBOOL_ERROR;
718 : }
719 4556 : CCC_Tribool const was
720 2278 : = status(block_at(bitset, bitset->count - 1), bitset->count - 1);
721 2278 : --bitset->count;
722 2278 : fix_end(bitset);
723 2278 : return was;
724 2279 : }
725 :
726 : CCC_Tribool
727 1025 : CCC_bitset_any_range(
728 : CCC_Bitset const *const bitset,
729 : size_t const range_start_index,
730 : size_t const range_bit_count
731 : ) {
732 1025 : return any_or_none_range(
733 1025 : bitset, range_start_index, range_bit_count, CCC_TRUE
734 : );
735 : }
736 :
737 : CCC_Tribool
738 512 : CCC_bitset_any(CCC_Bitset const *const bitset) {
739 512 : return any_or_none_range(bitset, 0, bitset->count, CCC_TRUE);
740 : }
741 :
742 : CCC_Tribool
743 512 : CCC_bitset_none_range(
744 : CCC_Bitset const *const bitset,
745 : size_t const range_start_index,
746 : size_t const range_bit_count
747 : ) {
748 512 : return any_or_none_range(
749 512 : bitset, range_start_index, range_bit_count, CCC_FALSE
750 : );
751 : }
752 :
753 : CCC_Tribool
754 512 : CCC_bitset_none(CCC_Bitset const *const bitset) {
755 512 : return any_or_none_range(bitset, 0, bitset->count, CCC_FALSE);
756 : }
757 :
758 : CCC_Tribool
759 516 : CCC_bitset_all_range(
760 : CCC_Bitset const *const bitset,
761 : size_t const range_start_index,
762 : size_t const range_bit_count
763 : ) {
764 516 : return all_range(bitset, range_start_index, range_bit_count);
765 : }
766 :
767 : CCC_Tribool
768 513 : CCC_bitset_all(CCC_Bitset const *const bitset) {
769 513 : return all_range(bitset, 0, bitset->count);
770 : }
771 :
772 : CCC_Count
773 1023 : CCC_bitset_first_trailing_one_range(
774 : CCC_Bitset const *const bitset,
775 : size_t const range_start_index,
776 : size_t const range_bit_count
777 : ) {
778 1023 : return first_trailing_bit_range(
779 1023 : bitset, range_start_index, range_bit_count, CCC_TRUE
780 : );
781 1023 : }
782 :
783 : CCC_Count
784 511 : CCC_bitset_first_trailing_one(CCC_Bitset const *const bitset) {
785 511 : return first_trailing_bit_range(bitset, 0, bitset->count, CCC_TRUE);
786 511 : }
787 :
788 : CCC_Count
789 4289 : CCC_bitset_first_trailing_ones(
790 : CCC_Bitset const *const bitset, size_t const ones_count
791 : ) {
792 4289 : return first_trailing_bits_range(
793 4289 : bitset, 0, bitset->count, ones_count, CCC_TRUE
794 : );
795 4289 : }
796 :
797 : CCC_Count
798 4305 : CCC_bitset_first_trailing_ones_range(
799 : CCC_Bitset const *const bitset,
800 : size_t const range_start_index,
801 : size_t const range_bit_count,
802 : size_t const ones_count
803 : ) {
804 4305 : return first_trailing_bits_range(
805 4305 : bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
806 : );
807 4305 : }
808 :
809 : CCC_Count
810 1022 : CCC_bitset_first_trailing_zero_range(
811 : CCC_Bitset const *const bitset,
812 : size_t const range_start_index,
813 : size_t const range_bit_count
814 : ) {
815 1022 : return first_trailing_bit_range(
816 1022 : bitset, range_start_index, range_bit_count, CCC_FALSE
817 : );
818 1022 : }
819 :
820 : CCC_Count
821 511 : CCC_bitset_first_trailing_zero(CCC_Bitset const *const bitset) {
822 511 : return first_trailing_bit_range(bitset, 0, bitset->count, CCC_FALSE);
823 511 : }
824 :
825 : CCC_Count
826 4289 : CCC_bitset_first_trailing_zeros(
827 : CCC_Bitset const *const bitset, size_t const zeros_count
828 : ) {
829 4289 : return first_trailing_bits_range(
830 4289 : bitset, 0, bitset->count, zeros_count, CCC_FALSE
831 : );
832 4289 : }
833 :
834 : CCC_Count
835 4304 : CCC_bitset_first_trailing_zeros_range(
836 : CCC_Bitset const *const bitset,
837 : size_t const range_start_index,
838 : size_t const range_bit_count,
839 : size_t const zeros_count
840 : ) {
841 4304 : return first_trailing_bits_range(
842 4304 : bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
843 : );
844 4304 : }
845 :
846 : CCC_Count
847 1029 : CCC_bitset_first_leading_one_range(
848 : CCC_Bitset const *const bitset,
849 : size_t const range_start_index,
850 : size_t const range_bit_count
851 : ) {
852 1029 : return first_leading_bit_range(
853 1029 : bitset, range_start_index, range_bit_count, CCC_TRUE
854 : );
855 1029 : }
856 :
857 : CCC_Count
858 512 : CCC_bitset_first_leading_one(CCC_Bitset const *const bitset) {
859 512 : return first_leading_bit_range(bitset, 0, bitset->count, CCC_TRUE);
860 512 : }
861 :
862 : CCC_Count
863 4280 : CCC_bitset_first_leading_ones(
864 : CCC_Bitset const *const bitset, size_t const ones_count
865 : ) {
866 4280 : return first_leading_bits_range(
867 4280 : bitset, 0, bitset->count, ones_count, CCC_TRUE
868 : );
869 4280 : }
870 :
871 : CCC_Count
872 4296 : CCC_bitset_first_leading_ones_range(
873 : CCC_Bitset const *const bitset,
874 : size_t const range_start_index,
875 : size_t const range_bit_count,
876 : size_t const ones_count
877 : ) {
878 4296 : return first_leading_bits_range(
879 4296 : bitset, range_start_index, range_bit_count, ones_count, CCC_TRUE
880 : );
881 4296 : }
882 :
883 : CCC_Count
884 1029 : CCC_bitset_first_leading_zero_range(
885 : CCC_Bitset const *const bitset,
886 : size_t const range_start_index,
887 : size_t const range_bit_count
888 : ) {
889 1029 : return first_leading_bit_range(
890 1029 : bitset, range_start_index, range_bit_count, CCC_FALSE
891 : );
892 1029 : }
893 :
894 : CCC_Count
895 512 : CCC_bitset_first_leading_zero(CCC_Bitset const *const bitset) {
896 512 : return first_leading_bit_range(bitset, 0, bitset->count, CCC_FALSE);
897 512 : }
898 :
899 : CCC_Count
900 4280 : CCC_bitset_first_leading_zeros(
901 : CCC_Bitset const *const bitset, size_t const zeros_count
902 : ) {
903 4280 : return first_leading_bits_range(
904 4280 : bitset, 0, bitset->count, zeros_count, CCC_FALSE
905 : );
906 4280 : }
907 :
908 : CCC_Count
909 4295 : CCC_bitset_first_leading_zeros_range(
910 : CCC_Bitset const *const bitset,
911 : size_t const range_start_index,
912 : size_t const range_bit_count,
913 : size_t const zeros_count
914 : ) {
915 4295 : return first_leading_bits_range(
916 4295 : bitset, range_start_index, range_bit_count, zeros_count, CCC_FALSE
917 : );
918 4295 : }
919 :
920 : CCC_Result
921 6 : CCC_bitset_clear(CCC_Bitset *const bitset) {
922 6 : if (!bitset) {
923 1 : return CCC_RESULT_ARGUMENT_ERROR;
924 : }
925 5 : if (bitset->blocks) {
926 5 : assert(bitset->capacity);
927 5 : (void)memset(
928 5 : bitset->blocks,
929 : CCC_FALSE,
930 5 : block_count(bitset->capacity) * SIZEOF_BLOCK
931 : );
932 5 : }
933 5 : bitset->count = 0;
934 5 : return CCC_RESULT_OK;
935 6 : }
936 :
937 : CCC_Result
938 11 : CCC_bitset_clear_and_free(
939 : CCC_Bitset *const bitset, CCC_Allocator const *const allocator
940 : ) {
941 11 : if (!bitset || !allocator) {
942 2 : return CCC_RESULT_ARGUMENT_ERROR;
943 : }
944 9 : if (!allocator->allocate) {
945 5 : return CCC_RESULT_NO_ALLOCATION_FUNCTION;
946 : }
947 4 : if (bitset->blocks) {
948 9 : (void)allocator->allocate((CCC_Allocator_arguments){
949 3 : .input = bitset->blocks,
950 : .bytes = 0,
951 3 : .context = allocator->context,
952 : });
953 3 : }
954 4 : bitset->count = 0;
955 4 : bitset->capacity = 0;
956 4 : bitset->blocks = NULL;
957 4 : return CCC_RESULT_OK;
958 11 : }
959 :
960 : CCC_Result
961 11 : CCC_bitset_reserve(
962 : CCC_Bitset *const bitset,
963 : size_t const to_add,
964 : CCC_Allocator const *const allocator
965 : ) {
966 11 : if (!bitset || !allocator || !allocator->allocate || !to_add) {
967 3 : return CCC_RESULT_ARGUMENT_ERROR;
968 : }
969 8 : return maybe_resize(bitset, to_add, allocator);
970 11 : }
971 :
972 : CCC_Result
973 8 : CCC_bitset_copy(
974 : CCC_Bitset *const destination,
975 : CCC_Bitset const *const source,
976 : CCC_Allocator const *const allocator
977 : ) {
978 8 : if (!destination || !source || !allocator
979 6 : || (destination->capacity < source->capacity && !allocator->allocate)) {
980 3 : return CCC_RESULT_ARGUMENT_ERROR;
981 : }
982 5 : if (!source->capacity) {
983 1 : destination->count = 0;
984 1 : return CCC_RESULT_OK;
985 : }
986 4 : if (destination->capacity < source->capacity) {
987 6 : Bit_block *const new_data
988 12 : = allocator->allocate((CCC_Allocator_arguments){
989 3 : .input = destination->blocks,
990 3 : .bytes = block_count(source->capacity) * SIZEOF_BLOCK,
991 3 : .context = allocator->context,
992 : });
993 3 : if (!new_data) {
994 1 : return CCC_RESULT_ALLOCATOR_ERROR;
995 : }
996 2 : destination->blocks = new_data;
997 2 : destination->capacity = source->capacity;
998 3 : }
999 3 : if (!source->blocks || !destination->blocks) {
1000 1 : return CCC_RESULT_ARGUMENT_ERROR;
1001 : }
1002 2 : destination->count = source->count;
1003 2 : (void)memcpy(
1004 2 : destination->blocks,
1005 2 : source->blocks,
1006 2 : block_count(source->capacity) * SIZEOF_BLOCK
1007 : );
1008 2 : fix_end(destination);
1009 2 : return CCC_RESULT_OK;
1010 8 : }
1011 :
1012 : void *
1013 2 : CCC_bitset_data(CCC_Bitset const *const bitset) {
1014 2 : if (!bitset) {
1015 1 : return NULL;
1016 : }
1017 1 : return bitset->blocks;
1018 2 : }
1019 :
1020 : CCC_Tribool
1021 7 : CCC_bitset_is_equal(
1022 : CCC_Bitset const *const left, CCC_Bitset const *const right
1023 : ) {
1024 7 : if (!left || !right) {
1025 2 : return CCC_TRIBOOL_ERROR;
1026 : }
1027 5 : if (left->count != right->count) {
1028 1 : return CCC_FALSE;
1029 : }
1030 4 : if (!left->count) {
1031 1 : return CCC_TRUE;
1032 : }
1033 6 : return memcmp(
1034 3 : left->blocks,
1035 3 : right->blocks,
1036 3 : block_count(left->count) * SIZEOF_BLOCK
1037 : )
1038 3 : == 0;
1039 7 : }
1040 :
1041 : /*========================= Private Interface =========================*/
1042 :
1043 : CCC_Result
1044 8 : CCC_private_bitset_reserve(
1045 : struct CCC_Bitset *const bitset,
1046 : size_t const to_add,
1047 : CCC_Allocator const *const allocator
1048 : ) {
1049 8 : return CCC_bitset_reserve(bitset, to_add, allocator);
1050 : }
1051 :
1052 : CCC_Tribool
1053 12 : CCC_private_bitset_set(
1054 : struct CCC_Bitset *const bitset, size_t const index, CCC_Tribool const bit
1055 : ) {
1056 12 : return CCC_bitset_set(bitset, index, bit);
1057 : }
1058 :
1059 : /*======================= Static Helpers ==============================*/
1060 :
1061 : /** Assumes set size is greater than or equal to subset size. */
1062 : static inline CCC_Tribool
1063 9 : is_subset_of(
1064 : struct CCC_Bitset const *const subset, struct CCC_Bitset const *const set
1065 : ) {
1066 9 : assert(set->count >= subset->count);
1067 69 : for (Block_count i = 0, end = block_count(subset->count); i < end; ++i) {
1068 : /* Invariant: the last N unused bits in a set are zero so this
1069 : * works. */
1070 60 : if ((set->blocks[i] & subset->blocks[i]) != subset->blocks[i]) {
1071 4 : return CCC_FALSE;
1072 : }
1073 56 : }
1074 5 : return CCC_TRUE;
1075 9 : }
1076 :
1077 : static CCC_Result
1078 1705 : maybe_resize(
1079 : struct CCC_Bitset *const bitset,
1080 : size_t const to_add,
1081 : CCC_Allocator const *const allocator
1082 : ) {
1083 1705 : size_t bits_needed = bitset->count + to_add;
1084 1705 : if (bits_needed <= bitset->capacity) {
1085 1692 : return CCC_RESULT_OK;
1086 : }
1087 13 : if (!allocator->allocate) {
1088 3 : return CCC_RESULT_NO_ALLOCATION_FUNCTION;
1089 : }
1090 10 : if (to_add == 1) {
1091 2 : bits_needed <<= 1;
1092 2 : }
1093 : static_assert(
1094 : (BIT_BLOCK_BITS & (BIT_BLOCK_BITS - 1)) == 0,
1095 : "rounding trick only works for powers of 2"
1096 : );
1097 20 : size_t const new_capacity
1098 10 : = (bits_needed + (BIT_BLOCK_BITS - 1)) & ~((size_t)BIT_BLOCK_BITS - 1);
1099 10 : if (new_capacity <= bitset->capacity) {
1100 1 : return CCC_RESULT_ARGUMENT_ERROR;
1101 : }
1102 18 : size_t const new_bytes
1103 9 : = block_count(new_capacity - bitset->count) * SIZEOF_BLOCK;
1104 18 : size_t const old_bytes
1105 9 : = bitset->count ? block_count(bitset->count) * SIZEOF_BLOCK : 0;
1106 36 : Bit_block *const new_data = allocator->allocate((CCC_Allocator_arguments){
1107 9 : .input = bitset->blocks,
1108 9 : .bytes = block_count(new_capacity) * SIZEOF_BLOCK,
1109 9 : .context = allocator->context,
1110 : });
1111 9 : if (!new_data) {
1112 1 : return CCC_RESULT_ALLOCATOR_ERROR;
1113 : }
1114 8 : (void)memset((char *)new_data + old_bytes, 0, new_bytes);
1115 8 : bitset->capacity = new_capacity;
1116 8 : bitset->blocks = new_data;
1117 8 : return CCC_RESULT_OK;
1118 1705 : }
1119 :
1120 : /** A trailing bit in a range is the first bit set to the specified boolean
1121 : value in the provided range. The input i gives the starting bit of the search,
1122 : meaning a bit within a block that is the inclusive start of the range. The count
1123 : gives us the end of the search for an overall range of `[i, i + count)`. This
1124 : means if the search range is greater than a single block we will iterate in
1125 : ascending order through our blocks and from least to most significant bit within
1126 : each block. */
1127 : static CCC_Count
1128 3067 : first_trailing_bit_range(
1129 : struct CCC_Bitset const *const bitset,
1130 : size_t const i,
1131 : size_t const count,
1132 : CCC_Tribool const is_one
1133 : ) {
1134 3067 : size_t const end_i = i + count;
1135 3067 : if (!bitset || !count || i >= bitset->count || end_i < i
1136 3066 : || end_i > bitset->count) {
1137 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1138 : }
1139 3066 : Block_count start_block = block_count_index(i);
1140 3066 : Bit_count const start_bit = bit_count_index(i);
1141 3066 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
1142 3066 : if (start_bit + count < BIT_BLOCK_BITS) {
1143 62 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + count));
1144 62 : }
1145 6132 : Bit_count trailing_zeros
1146 3066 : = is_one
1147 1533 : ? count_trailing_zeros(first_block_on & bitset->blocks[start_block])
1148 1533 : : count_trailing_zeros(
1149 1533 : first_block_on & ~bitset->blocks[start_block]
1150 : );
1151 3066 : if (trailing_zeros != BIT_BLOCK_BITS) {
1152 2108 : return (CCC_Count){
1153 1054 : .count = (start_block * BIT_BLOCK_BITS) + trailing_zeros,
1154 : };
1155 : }
1156 2012 : Block_count const end_block = block_count_index(end_i - 1);
1157 2012 : if (end_block == start_block) {
1158 64 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1159 : }
1160 : /* Handle all values in between start and end in bulk. */
1161 15360 : while (++start_block < end_block) {
1162 14336 : trailing_zeros = is_one
1163 7168 : ? count_trailing_zeros(bitset->blocks[start_block])
1164 7168 : : count_trailing_zeros(~bitset->blocks[start_block]);
1165 14336 : if (trailing_zeros != BIT_BLOCK_BITS) {
1166 1848 : return (CCC_Count){
1167 924 : .count = (start_block * BIT_BLOCK_BITS) + trailing_zeros,
1168 : };
1169 : }
1170 : }
1171 : /* Handle last block. */
1172 1024 : Bit_count const end_bit = bit_count_index(end_i - 1);
1173 2048 : Bit_block const last_block_on
1174 1024 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
1175 : trailing_zeros
1176 1024 : = is_one
1177 512 : ? count_trailing_zeros(last_block_on & bitset->blocks[end_block])
1178 512 : : count_trailing_zeros(last_block_on & ~bitset->blocks[end_block]);
1179 1024 : if (trailing_zeros != BIT_BLOCK_BITS) {
1180 132 : return (CCC_Count){
1181 66 : .count = (end_block * BIT_BLOCK_BITS) + trailing_zeros,
1182 : };
1183 : }
1184 958 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1185 3067 : }
1186 :
1187 : /** Finds the starting index of a sequence of 1's or 0's of the num_bits size in
1188 : linear time. The algorithm aims to efficiently skip as many bits as possible
1189 : while searching for the desired group. This avoids both an O(N^2) runtime and
1190 : the use of any unnecessary modulo or division operations in a hot loop. */
1191 : static CCC_Count
1192 17187 : first_trailing_bits_range(
1193 : struct CCC_Bitset const *const bitset,
1194 : size_t const i,
1195 : size_t const count,
1196 : size_t const num_bits,
1197 : CCC_Tribool const is_one
1198 : ) {
1199 17187 : size_t const range_end = i + count;
1200 17187 : if (!bitset || !count || i >= bitset->count || num_bits > count
1201 17180 : || range_end < i || range_end > bitset->count) {
1202 209 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1203 : }
1204 16978 : size_t num_found = 0;
1205 16978 : size_t bits_start = i;
1206 16978 : Block_count cur_block = block_count_index(i);
1207 16978 : size_t cur_end = (cur_block * BIT_BLOCK_BITS) + BIT_BLOCK_BITS;
1208 16978 : Bit_count bit_i = bit_count_index(i);
1209 125890 : for (;;) {
1210 : /* After the first iteration the bit index is always 0, so the
1211 : supplemental AND with the shifted expression returns the
1212 : original block. Makes code simpler to leave it. Test if this
1213 : is costly (I think probably not).*/
1214 251780 : Bit_block bits
1215 125890 : = is_one ? bitset->blocks[cur_block] & (BIT_BLOCK_ON << bit_i)
1216 62945 : : ~bitset->blocks[cur_block] & (BIT_BLOCK_ON << bit_i);
1217 125890 : if (cur_end > range_end) {
1218 6294 : bits &= ~(BIT_BLOCK_ON << bit_count_index(range_end));
1219 6294 : }
1220 125890 : struct Group_count const ones
1221 125890 : = max_trailing_ones(bits, bit_i, num_bits - num_found);
1222 125890 : if (ones.count >= num_bits) {
1223 : /* Found the solution all at once within a block. */
1224 5076 : return (CCC_Count){
1225 2538 : .count = (cur_block * BIT_BLOCK_BITS) + ones.index,
1226 : };
1227 : }
1228 123352 : if (!ones.index) {
1229 10538 : if (num_found + ones.count >= num_bits) {
1230 : /* Found solution crossing block boundary from
1231 : * prefix blocks. */
1232 6038 : return (CCC_Count){.count = bits_start};
1233 : }
1234 : /* Found a full block so keep on trucking. */
1235 4500 : num_found += ones.count;
1236 4500 : } else {
1237 : /* Fail but we have largest skip possible to continue
1238 : our search from in order to save double checking
1239 : unnecessary prefixes. */
1240 112814 : bits_start = (cur_block * BIT_BLOCK_BITS) + ones.index;
1241 112814 : num_found = ones.count;
1242 : }
1243 117314 : if (bits_start + num_bits > range_end) {
1244 8402 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1245 : }
1246 108912 : bit_i = 0;
1247 108912 : ++cur_block;
1248 108912 : cur_end += BIT_BLOCK_BITS;
1249 125890 : }
1250 17187 : }
1251 :
1252 : /** Returns the maximum group of consecutive ones in the bit block given. If the
1253 : number of consecutive ones remaining cannot be found the function returns
1254 : where the next search should start from, a critical step to a linear search;
1255 : specifically, we seek any group of continuous ones that runs from some index
1256 : in the block to the end of the block.
1257 :
1258 : If no continuous group of ones exist that runs to the end of the block, the
1259 : BLOCK_BITS index is returned with a group size of 0 meaning the search for ones
1260 : will need to continue in the next block. This is helpful for the main search
1261 : loop adding to its start index and number of ones found so far. */
1262 : static inline struct Group_count
1263 125890 : max_trailing_ones(
1264 : Bit_block const block, Bit_count bit_index, size_t const ones_remain
1265 : ) {
1266 : /* Easy exit skip to the next block. Helps with sparse sets. */
1267 125890 : if (!block) {
1268 97184 : return (struct Group_count){.index = BIT_BLOCK_BITS};
1269 : }
1270 28706 : if (ones_remain <= BIT_BLOCK_BITS) {
1271 18946 : assert(ones_remain);
1272 18946 : assert(bit_index < BIT_BLOCK_BITS);
1273 18946 : Bit_block block_bits = block >> bit_index;
1274 37892 : Bit_block const required_mask
1275 18946 : = BIT_BLOCK_ON >> (BIT_BLOCK_BITS - ones_remain);
1276 : /* The loop continues only while our block is numerically greater than
1277 : the mask. Because unsigned integers are represented in base 2 we get
1278 : two automatic early exits here.
1279 :
1280 : - If the block is missing a high-order bit in the required mask,
1281 : it is numerically smaller than the mask and cannot match with
1282 : further shifting.
1283 :
1284 : - If all high bits match but some lower required bits are zero,
1285 : the block is numerically smaller than the mask and cannot match
1286 : with further shifting.
1287 :
1288 : If the block has high order bits not in the mask it is numerically
1289 : greater than the mask and we continue checking, which is correct.
1290 : This strategy optimizes out some useless shifts. */
1291 64098 : while (block_bits >= required_mask) {
1292 53728 : if ((required_mask & block_bits) == required_mask) {
1293 25728 : return (struct Group_count){
1294 8576 : .index = bit_index,
1295 8576 : .count = (Bit_count)ones_remain,
1296 : };
1297 : }
1298 45152 : ++bit_index;
1299 45152 : block_bits >>= 1;
1300 : }
1301 18946 : }
1302 : /* 2 cases covered: the ones remaining are greater than this block could
1303 : hold or we did not find a match by the masking we just did. In either
1304 : case we need the maximum contiguous ones that run all the way to the
1305 : MSB. The best we could have is a full block of 1's. Otherwise we need
1306 : to find where to start our new search for contiguous 1's. This could
1307 : be the next block if there are not 1's that continue all the way to
1308 : MSB. */
1309 20130 : Bit_count const leading_ones = count_leading_zeros(~block);
1310 60390 : return (struct Group_count){
1311 20130 : .index = BIT_BLOCK_BITS - leading_ones,
1312 20130 : .count = leading_ones,
1313 : };
1314 125890 : }
1315 :
1316 : /** A leading bit is the first bit in the range to be set to the indicated value
1317 : within a block starting the search from the Most Significant Bit of each block.
1318 : This means that if the range is larger than a single block we iterate in
1319 : descending order through the set of blocks starting at `i + count - 1` for the
1320 : range of `[i, i + count)`. The search within a given block proceeds from Most
1321 : Significant Bit toward Least Significant Bit. */
1322 : static CCC_Count
1323 3082 : first_leading_bit_range(
1324 : struct CCC_Bitset const *const bitset,
1325 : size_t const i,
1326 : size_t const count,
1327 : CCC_Tribool const is_one
1328 : ) {
1329 3082 : if (!bitset || !count || i >= bitset->count || i + count <= i
1330 3082 : || i + count > bitset->count) {
1331 1022 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1332 : }
1333 2060 : size_t const end_i = i;
1334 2060 : size_t const start_i = i + count - 1;
1335 2060 : Bit_count const start_bit = bit_count_index(start_i);
1336 2060 : Bit_count const end_bit = bit_count_index(end_i);
1337 2060 : Block_count start_block = block_count_index(start_i);
1338 4120 : Bit_block first_block_on
1339 2060 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - start_bit) - 1);
1340 2060 : if (start_i - end_i + 1 < BIT_BLOCK_BITS) {
1341 72 : first_block_on &= (BIT_BLOCK_ON << end_bit);
1342 72 : }
1343 4120 : Bit_count leading_zeros
1344 2060 : = is_one
1345 1030 : ? count_leading_zeros(first_block_on & bitset->blocks[start_block])
1346 1030 : : count_leading_zeros(
1347 1030 : first_block_on & ~bitset->blocks[start_block]
1348 : );
1349 2060 : if (leading_zeros != BIT_BLOCK_BITS) {
1350 2128 : return (CCC_Count){
1351 1064 : .count = (start_block * BIT_BLOCK_BITS)
1352 1064 : + (Block_count)(BIT_BLOCK_BITS - leading_zeros - 1),
1353 : };
1354 : }
1355 996 : Block_count const end_block = block_count_index(end_i);
1356 996 : if (start_block == end_block) {
1357 2 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1358 : }
1359 7770 : while (--start_block > end_block) {
1360 7700 : leading_zeros = is_one
1361 3850 : ? count_leading_zeros(bitset->blocks[start_block])
1362 3850 : : count_leading_zeros(~bitset->blocks[start_block]);
1363 7700 : if (leading_zeros != BIT_BLOCK_BITS) {
1364 1848 : return (CCC_Count){
1365 924 : .count = (start_block * BIT_BLOCK_BITS)
1366 924 : + (Block_count)(BIT_BLOCK_BITS - leading_zeros - 1),
1367 : };
1368 : }
1369 : }
1370 : /* Handle last block. */
1371 70 : Bit_block const last_block_on = (BIT_BLOCK_ON << end_bit);
1372 : leading_zeros
1373 70 : = is_one
1374 35 : ? count_leading_zeros(last_block_on & bitset->blocks[end_block])
1375 35 : : count_leading_zeros(last_block_on & ~bitset->blocks[end_block]);
1376 70 : if (leading_zeros != BIT_BLOCK_BITS) {
1377 136 : return (CCC_Count){
1378 68 : .count = (end_block * BIT_BLOCK_BITS)
1379 68 : + (Block_count)(BIT_BLOCK_BITS - leading_zeros - 1),
1380 : };
1381 : }
1382 2 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1383 3082 : }
1384 :
1385 : /* Overall I am not thrilled with the need for handling signed values and back
1386 : wards iteration in this version. However, I am having trouble finding a clean
1387 : way to do this unsigned. Signed simplifies the iteration and interaction with
1388 : the helper function finding leading ones because the algorithm is complex
1389 : enough as is. Candidate for refactor. */
1390 : static CCC_Count
1391 17151 : first_leading_bits_range(
1392 : struct CCC_Bitset const *const bitset,
1393 : size_t const i,
1394 : size_t const count,
1395 : size_t const num_bits,
1396 : CCC_Tribool const is_one
1397 : ) {
1398 : /* The only risk is that i is out of range of `ptrdiff_t` which would
1399 : mean that we cannot proceed with algorithm. However, this is unlikely
1400 : on most platforms as they often bound object size by the max pointer
1401 : difference possible, which this type provides. However, it is not
1402 : required by the C standard so we are obligated to check. */
1403 17151 : ptrdiff_t const range_end = (ptrdiff_t)(i - 1);
1404 17151 : if (!bitset || !count || num_bits > PTRDIFF_MAX || i > PTRDIFF_MAX
1405 17150 : || i >= bitset->count || !num_bits || num_bits > count
1406 17150 : || range_end < -1) {
1407 1 : return (CCC_Count){.error = CCC_RESULT_ARGUMENT_ERROR};
1408 : }
1409 17150 : size_t num_found = 0;
1410 17150 : ptrdiff_t bits_start = (ptrdiff_t)(i + count - 1);
1411 : /* If we passed the earlier signed range check this cast is safe because
1412 : (i / block bits) for some block index must be less than i. */
1413 34300 : Block_signed_count cur_block
1414 17150 : = (Block_signed_count)block_count_index((size_t)bits_start);
1415 17150 : ptrdiff_t cur_end = (ptrdiff_t)((cur_block * BIT_BLOCK_BITS) - 1);
1416 17150 : Bit_signed_count bit_index = bit_count_index((size_t)bits_start);
1417 130658 : for (;;) {
1418 0 : assert(
1419 130658 : cur_block >= 0
1420 130658 : && "current block is safe as index protected by bits_start "
1421 : "iterating toward the end of the range"
1422 : );
1423 : /* After the first iteration the bit index is always the Most
1424 : Significant bit of the block, so the supplemental AND with
1425 : the shifted expression returns the original block. Makes code
1426 : simpler to leave it. Test if this is costly (I think probably
1427 : not).*/
1428 261316 : Bit_block bits
1429 130658 : = is_one
1430 65329 : ? bitset->blocks[cur_block]
1431 65329 : & (BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - bit_index) - 1))
1432 65329 : : ~bitset->blocks[cur_block]
1433 65329 : & (BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - bit_index) - 1));
1434 130658 : if (cur_end < range_end) {
1435 0 : assert(
1436 5532 : range_end + 1 >= 0
1437 5532 : && "If range end is less than -1 it is caught at entry to "
1438 : "function"
1439 : );
1440 5532 : bits &= (BIT_BLOCK_ON << bit_count_index((size_t)(range_end + 1)));
1441 5532 : }
1442 130658 : struct Group_signed_count const ones
1443 130658 : = max_leading_ones(bits, bit_index, num_bits - num_found);
1444 130658 : if ((size_t)ones.count >= num_bits) {
1445 0 : assert(
1446 2532 : ones.index >= 0
1447 2532 : && "The index cannot be negative if ones were found and num "
1448 : "bits is positive non-zero."
1449 : );
1450 5064 : return (CCC_Count){
1451 : .count
1452 2532 : = ((size_t)cur_block * BIT_BLOCK_BITS) + (size_t)ones.index,
1453 : };
1454 : }
1455 128126 : if (ones.index == BIT_BLOCK_BITS - 1) {
1456 11408 : num_found += (size_t)ones.count;
1457 : /* Continuation from prefix blocks has resulted in success. */
1458 11408 : if (num_found >= num_bits) {
1459 0 : assert(
1460 6026 : bits_start >= 0
1461 6026 : && "Bits starting point cannot be less than end of range "
1462 : "or end of range plus number of bits. Either guarantees "
1463 : "positive."
1464 : );
1465 6026 : return (CCC_Count){.count = (size_t)bits_start};
1466 : }
1467 5382 : } else {
1468 : /* If the new block start index is -1, then this addition bumps us
1469 : to the next block's Most Significant Bit .*/
1470 : bits_start
1471 116718 : = (cur_block * (Bit_signed_count)BIT_BLOCK_BITS) + ones.index;
1472 116718 : num_found = (size_t)ones.count;
1473 : }
1474 : /* Cast was checked at entry to function for safety. */
1475 122100 : if (bits_start < range_end + (ptrdiff_t)num_bits) {
1476 8592 : return (CCC_Count){.error = CCC_RESULT_FAIL};
1477 : }
1478 113508 : bit_index = BIT_BLOCK_BITS - 1;
1479 113508 : --cur_block;
1480 113508 : cur_end -= BIT_BLOCK_BITS;
1481 130658 : }
1482 17151 : }
1483 :
1484 : /** Returns the maximum group of consecutive ones in the bit block given. If the
1485 : number of consecutive ones remaining cannot be found the function returns
1486 : where the next search should start from, a critical step to a linear search;
1487 : specifically, we seek any group of continuous ones that runs from some index
1488 : in the block to the start of the block (0th index).
1489 :
1490 : If no continuous group of ones exist that runs to the start of the block, a -1
1491 : index is returned with a group size of 0 meaning the search for ones will need
1492 : to continue in the next block lower block. This is helpful for the main search
1493 : loop adding to its start index and number of ones found so far. Adding -1 is
1494 : just subtraction so this will correctly drop us to the top bit of the next Least
1495 : Significant Block to continue the search. */
1496 : static struct Group_signed_count
1497 130658 : max_leading_ones(
1498 : Bit_block const block,
1499 : Bit_signed_count bit_index,
1500 : size_t const ones_remaining
1501 : ) {
1502 130658 : if (!block) {
1503 96352 : return (struct Group_signed_count){.index = -1};
1504 : }
1505 34306 : if (ones_remaining <= BIT_BLOCK_BITS) {
1506 22810 : assert(bit_index < BIT_BLOCK_BITS);
1507 22810 : Bit_block block_bits = block << (BIT_BLOCK_BITS - bit_index - 1);
1508 45620 : Bit_block const required_mask = BIT_BLOCK_ON
1509 22810 : << (BIT_BLOCK_BITS - ones_remaining);
1510 22810 : Bit_signed_count const end = (Bit_signed_count)ones_remaining;
1511 193018 : while (bit_index >= end) {
1512 178502 : if ((required_mask & block_bits) == required_mask) {
1513 24882 : return (struct Group_signed_count){
1514 8294 : .index = bit_index,
1515 8294 : .count = end,
1516 : };
1517 : }
1518 170208 : --bit_index;
1519 170208 : block_bits <<= 1;
1520 : }
1521 22810 : }
1522 52024 : Bit_signed_count const trailing_ones
1523 26012 : = (Bit_signed_count)count_trailing_zeros(~block);
1524 26012 : assert(trailing_ones >= 0);
1525 78036 : return (struct Group_signed_count){
1526 : /* May be -1 if no ones found. This make backward iteration easier. */
1527 26012 : .index = (Bit_signed_count)(trailing_ones - 1),
1528 26012 : .count = trailing_ones,
1529 : };
1530 130658 : }
1531 :
1532 : /** Performs the any or none scan operation over the specified range. The only
1533 : difference between the operations is the return value. Specify the desired
1534 : Tribool value to return upon encountering an on bit. For any this is
1535 : CCC_TRUE. For none this is CCC_FALSE. Saves writing two identical fns. */
1536 : static CCC_Tribool
1537 2561 : any_or_none_range(
1538 : struct CCC_Bitset const *const bitset,
1539 : size_t const i,
1540 : size_t const count,
1541 : CCC_Tribool const ret
1542 : ) {
1543 2561 : size_t const end_i = i + count;
1544 2561 : if (!bitset || !count || i >= bitset->count || end_i < i
1545 2560 : || end_i > bitset->count || ret < CCC_FALSE || ret > CCC_TRUE) {
1546 1 : return CCC_TRIBOOL_ERROR;
1547 : }
1548 2560 : Block_count start_block = block_count_index(i);
1549 2560 : Bit_count const start_bit = bit_count_index(i);
1550 2560 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
1551 2560 : if (start_bit + count < BIT_BLOCK_BITS) {
1552 15 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + count));
1553 15 : }
1554 2560 : if (first_block_on & bitset->blocks[start_block]) {
1555 160 : return ret;
1556 : }
1557 2400 : Block_count const end_block = block_count_index(end_i - 1);
1558 2400 : if (end_block == start_block) {
1559 16 : return !ret;
1560 : }
1561 : /* If this is the any check we might get lucky by checking the last
1562 : block before looping over everything. */
1563 2384 : Bit_count const end_bit = bit_count_index(end_i - 1);
1564 4768 : Bit_block const last_block_on
1565 2384 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
1566 2384 : if (last_block_on & bitset->blocks[end_block]) {
1567 224 : return ret;
1568 : }
1569 20864 : for (++start_block; start_block < end_block; ++start_block) {
1570 19600 : if (bitset->blocks[start_block] & BIT_BLOCK_ON) {
1571 896 : return ret;
1572 : }
1573 18704 : }
1574 1264 : return !ret;
1575 2561 : }
1576 :
1577 : /** Check for all on is slightly different from the any or none checks so we
1578 : need a painfully repetitive function. */
1579 : static CCC_Tribool
1580 1029 : all_range(
1581 : struct CCC_Bitset const *const bitset, size_t const i, size_t const count
1582 : ) {
1583 1029 : size_t const end = i + count;
1584 1029 : if (!bitset || !count || i >= bitset->count || end < i
1585 1028 : || end > bitset->count) {
1586 1 : return CCC_TRIBOOL_ERROR;
1587 : }
1588 1028 : Block_count start_block = block_count_index(i);
1589 1028 : Bit_count const start_bit = bit_count_index(i);
1590 1028 : Bit_block first_block_on = BIT_BLOCK_ON << start_bit;
1591 1028 : if (start_bit + count < BIT_BLOCK_BITS) {
1592 2 : first_block_on &= ~(BIT_BLOCK_ON << (start_bit + count));
1593 2 : }
1594 1028 : if ((first_block_on & bitset->blocks[start_block]) != first_block_on) {
1595 768 : return CCC_FALSE;
1596 : }
1597 260 : Block_count const end_block = block_count_index(end - 1);
1598 260 : if (end_block == start_block) {
1599 1 : return CCC_TRUE;
1600 : }
1601 2075 : while (++start_block < end_block) {
1602 1817 : if (bitset->blocks[start_block] != BIT_BLOCK_ON) {
1603 1 : return CCC_FALSE;
1604 : }
1605 : }
1606 258 : Bit_count const end_bit = bit_count_index(end - 1);
1607 516 : Bit_block const last_block_on
1608 258 : = BIT_BLOCK_ON >> ((BIT_BLOCK_BITS - end_bit) - 1);
1609 258 : if ((last_block_on & bitset->blocks[end_block]) != last_block_on) {
1610 1 : return CCC_FALSE;
1611 : }
1612 257 : return CCC_TRUE;
1613 1029 : }
1614 :
1615 : /** Given the 0 based index from `[0, count of used bits in set)` returns a
1616 : reference to the block that such a bit belongs to. This block reference will
1617 : point to some block at index [0, count of blocks used in the set). */
1618 : static inline Bit_block *
1619 38404 : block_at(struct CCC_Bitset const *const bitset, size_t const bitset_index) {
1620 38404 : return &bitset->blocks[block_count_index(bitset_index)];
1621 : }
1622 :
1623 : /** Sets all bits in bulk to value b and fixes the end block to ensure all bits
1624 : in the final block that are not in use are zeroed out. */
1625 : static inline void
1626 36 : set_all(struct CCC_Bitset *const bitset, CCC_Tribool const b) {
1627 72 : (void)memset(
1628 72 : bitset->blocks, b ? ~0 : 0, block_count(bitset->count) * SIZEOF_BLOCK
1629 : );
1630 36 : fix_end(bitset);
1631 36 : }
1632 :
1633 : /** Given the appropriate block in which bit_i resides, sets the bits position
1634 : to 0 or 1 as specified by the CCC_Tribool argument b.
1635 :
1636 : Assumes block has been retrieved correctly in range [0, count of blocks in set)
1637 : and that bit_i is in range [0, count of active bits in set). */
1638 : static inline void
1639 12916 : set(Bit_block *const block, size_t const bitset_index, CCC_Tribool const b) {
1640 12916 : if (b) {
1641 7889 : *block |= on(bitset_index);
1642 7889 : } else {
1643 5027 : *block &= ~on(bitset_index);
1644 : }
1645 12916 : }
1646 :
1647 : /** Given the bit set and the set index--set index is allowed to be greater than
1648 : the size of one block--returns the status of the bit at that index. */
1649 : static inline CCC_Tribool
1650 17640 : status(Bit_block const *const bitset, size_t const bitset_index) {
1651 : /* Be careful. The & op does not promise to evaluate to 1 or 0. We often
1652 : just use it where that conversion takes place implicitly for us. */
1653 17640 : return (*bitset & on(bitset_index)) != 0;
1654 : }
1655 :
1656 : /** Given the true bit index in the bit set, expected to be in the range
1657 : [0, count of active bits in set), returns a Bit_block mask with only this bit
1658 : on in block to which it belongs. This mask guarantees to have a bit on within
1659 : a bit block at index [0, BIT_BLOCK_BITS - 1). */
1660 : static inline Bit_block
1661 30560 : on(size_t bitset_index) {
1662 30560 : return (Bit_block)1 << bit_count_index(bitset_index);
1663 : }
1664 :
1665 : /** Clears unused bits in the last block according to count. Sets the last block
1666 : to have only the used bits set to their given values and all bits after to zero.
1667 : This is used as a safety mechanism throughout the code after complex operations
1668 : on bit blocks to ensure any side effects on unused bits are deleted. */
1669 : static inline void
1670 19092 : fix_end(struct CCC_Bitset *const bitset) {
1671 : /* Remember, we fill from LSB to MSB so we want the mask to start at
1672 : lower order bit which is why we do the second funky flip on the whole
1673 : op. */
1674 19092 : bitset->count
1675 19072 : ? (*block_at(bitset, bitset->count - 1)
1676 38144 : &= ~(((Bit_block)~1) << bit_count_index(bitset->count - 1)))
1677 20 : : (bitset->blocks[0] = 0);
1678 19092 : }
1679 :
1680 : /** Returns the 0-based index of the block in the block array allocation to
1681 : which the given index belongs. Assumes the given index is somewhere between [0,
1682 : count of bits set). The returned index then represents the block in which this
1683 : index resides which is in the range [0, block containing last in use bit). */
1684 : static inline Block_count
1685 135496 : block_count_index(size_t const bitset_index) {
1686 : static_assert(
1687 : (typeof(bitset_index))~((typeof(bitset_index))0)
1688 : >= (typeof(bitset_index))0,
1689 : "shifting to avoid division with power of 2 divisor is only "
1690 : "defined for unsigned types"
1691 : );
1692 135496 : return bitset_index >> BIT_BLOCK_BITS_LOG2;
1693 : }
1694 :
1695 : /** Returns the 0-based index within a block to which the given index belongs.
1696 : This index will always be between [0, BIT_BLOCK_BITS - 1). */
1697 : static inline Bit_count
1698 156040 : bit_count_index(size_t const bitset_index) {
1699 156040 : return bitset_index & (BIT_BLOCK_BITS - 1);
1700 : }
1701 :
1702 : /** Returns the number of blocks required to store the given bits. Assumes bits
1703 : is non-zero. For any bits > 1 the block count is always less than bits.*/
1704 : static inline Block_count
1705 9383 : block_count(size_t const bit_count) {
1706 : static_assert(
1707 : (typeof(bit_count))~((typeof(bit_count))0) >= (typeof(bit_count))0,
1708 : "shifting to avoid division with power of 2 divisor is only "
1709 : "defined for unsigned types"
1710 : );
1711 9383 : assert(bit_count);
1712 9383 : return (bit_count + (BIT_BLOCK_BITS - 1)) >> BIT_BLOCK_BITS_LOG2;
1713 : }
1714 :
1715 : /** Returns min of size_t arguments. Beware of conversions. */
1716 : static inline size_t
1717 6 : size_t_min(size_t const a, size_t const b) {
1718 6 : return a < b ? a : b;
1719 : }
1720 :
1721 : /** The following asserts assure that whether portable or built in bit
1722 : operations are used in the coming section we are safe in our assumptions about
1723 : widths and counts. */
1724 : static_assert(BIT_BLOCK_MSB < BIT_BLOCK_ON);
1725 : static_assert(SIZEOF_BLOCK == sizeof(unsigned));
1726 :
1727 : /** Much of the code relies on the assumption that iterating over blocks at
1728 : at a time is faster than using mathematical operations to conceptually iterate
1729 : over bits. This assumptions mostly comes from the use of these built-ins to
1730 : keep the processing time linear for range based queries, while avoiding division
1731 : and modulo operations. I should test to see the performance implications when
1732 : these built-ins are gone. However they are pretty ubiquitous these days. */
1733 :
1734 : /** Built-ins are common on Clang and GCC but we have portable fallback. */
1735 : #if defined(__has_builtin) && __has_builtin(__builtin_ctz) \
1736 : && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount)
1737 :
1738 : /** Counts the on bits in a bit block. */
1739 : static inline Bit_count
1740 227961 : popcount(Bit_block const b) {
1741 : /* There are different pop counts for different integer widths. Be sure
1742 : to catch the use of the wrong one by mistake here at compile time. */
1743 : static_assert(__builtin_popcount((Bit_block)~0) <= U8_BLOCK_MAX);
1744 227961 : return (Bit_count)__builtin_popcount(b);
1745 : }
1746 :
1747 : /** Counts the number of trailing zeros in a bit block starting from least
1748 : significant bit. */
1749 : static inline Bit_count
1750 44438 : count_trailing_zeros(Bit_block const b) {
1751 : static_assert(__builtin_ctz(BIT_BLOCK_MSB) <= U8_BLOCK_MAX);
1752 44438 : return b ? (Bit_count)__builtin_ctz(b) : BIT_BLOCK_BITS;
1753 : }
1754 :
1755 : /** Counts the leading zeros in a bit block starting from the most significant
1756 : bit. */
1757 : static inline Bit_count
1758 29960 : count_leading_zeros(Bit_block const b) {
1759 : static_assert(__builtin_clz((Bit_block)1) <= U8_BLOCK_MAX);
1760 29960 : return b ? (Bit_count)__builtin_clz(b) : BIT_BLOCK_BITS;
1761 : }
1762 :
1763 : #else /* !defined(__has_builtin) || !__has_builtin(__builtin_ctz) \
1764 : || !__has_builtin(__builtin_clz) || !__has_builtin(__builtin_popcount) */
1765 :
1766 : /** Counts the on bits in a bit block. */
1767 : static inline Bit_count
1768 : popcount(Bit_block b) {
1769 : Bit_count cnt = 0;
1770 : for (; b; cnt += ((b & 1U) != 0), b >>= 1U) {}
1771 : return cnt;
1772 : }
1773 :
1774 : /** Counts the number of trailing zeros in a bit block starting from least
1775 : significant bit. */
1776 : static inline Bit_count
1777 : count_trailing_zeros(Bit_block b) {
1778 : if (!b) {
1779 : return BIT_BLOCK_BITS;
1780 : }
1781 : Bit_count cnt = 0;
1782 : for (; (b & 1U) == 0; ++cnt, b >>= 1U) {}
1783 : return cnt;
1784 : }
1785 :
1786 : /** Counts the leading zeros in a bit block starting from the most significant
1787 : bit. */
1788 : static inline Bit_count
1789 : count_leading_zeros(Bit_block b) {
1790 : if (!b) {
1791 : return BIT_BLOCK_BITS;
1792 : }
1793 : Bit_count cnt = 0;
1794 : for (; (b & BIT_BLOCK_MSB) == 0; ++cnt, b <<= 1U) {}
1795 : return cnt;
1796 : }
1797 :
1798 : #endif /* defined(__has_builtin) && __has_builtin(__builtin_ctz) \
1799 : && __has_builtin(__builtin_clz) && __has_builtin(__builtin_popcount) */
|