Skip to content

Commit 162b2a8

Browse files
committed
Use standard BitIndexIterator instead of specialized u32
1 parent 556e673 commit 162b2a8

File tree

3 files changed

+6
-174
lines changed

3 files changed

+6
-174
lines changed

arrow-buffer/src/buffer/boolean.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::bit_chunk_iterator::BitChunks;
19-
use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
19+
use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
2020
use crate::{
2121
bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
2222
BooleanBufferBuilder, Buffer, MutableBuffer,
@@ -208,11 +208,6 @@ impl BooleanBuffer {
208208
BitIndexIterator::new(self.values(), self.offset, self.len)
209209
}
210210

211-
/// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
212-
pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
213-
BitIndexU32Iterator::new(self.values(), self.offset, self.len)
214-
}
215-
216211
/// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
217212
pub fn set_slices(&self) -> BitSliceIterator<'_> {
218213
BitSliceIterator::new(self.values(), self.offset, self.len)

arrow-buffer/src/util/bit_iterator.rs

Lines changed: 1 addition & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ impl<'a> BitIndexIterator<'a> {
216216
impl Iterator for BitIndexIterator<'_> {
217217
type Item = usize;
218218

219-
#[inline]
219+
#[inline(always)]
220220
fn next(&mut self) -> Option<Self::Item> {
221221
loop {
222222
if self.current_chunk != 0 {
@@ -231,63 +231,6 @@ impl Iterator for BitIndexIterator<'_> {
231231
}
232232
}
233233

234-
/// An iterator of u32 whose index in a provided bitmask is true
235-
/// Respects arbitrary offsets and slice lead/trail padding exactly like BitIndexIterator
236-
#[derive(Debug)]
237-
pub struct BitIndexU32Iterator<'a> {
238-
curr: u64,
239-
chunk_offset: i64,
240-
iter: UnalignedBitChunkIterator<'a>,
241-
}
242-
243-
impl<'a> BitIndexU32Iterator<'a> {
244-
/// Create a new [BitIndexU32Iterator] from the provided buffer,
245-
/// offset and len in bits.
246-
pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
247-
// Build the aligned chunks (including prefix/suffix masked)
248-
let chunks = UnalignedBitChunk::new(buffer, offset, len);
249-
let mut iter = chunks.iter();
250-
251-
// First 64-bit word (masked for lead padding), or 0 if empty
252-
let curr = iter.next().unwrap_or(0);
253-
// Negative lead padding ensures the first bit in curr maps to index 0
254-
let chunk_offset = -(chunks.lead_padding() as i64);
255-
256-
Self {
257-
curr,
258-
chunk_offset,
259-
iter,
260-
}
261-
}
262-
}
263-
264-
impl<'a> Iterator for BitIndexU32Iterator<'a> {
265-
type Item = u32;
266-
267-
#[inline(always)]
268-
fn next(&mut self) -> Option<u32> {
269-
loop {
270-
if self.curr != 0 {
271-
// Position of least-significant set bit
272-
let tz = self.curr.trailing_zeros();
273-
// Clear that bit
274-
self.curr &= self.curr - 1;
275-
// Return global index = chunk_offset + tz
276-
return Some((self.chunk_offset + tz as i64) as u32);
277-
}
278-
// Advance to next 64-bit chunk
279-
match self.iter.next() {
280-
Some(next_chunk) => {
281-
// Move offset forward by 64 bits
282-
self.chunk_offset += 64;
283-
self.curr = next_chunk;
284-
}
285-
None => return None,
286-
}
287-
}
288-
}
289-
}
290-
291234
/// Calls the provided closure for each index in the provided null mask that is set,
292235
/// using an adaptive strategy based on the null count
293236
///
@@ -380,110 +323,4 @@ mod tests {
380323
let mask = &[223, 23];
381324
BitIterator::new(mask, 17, 0);
382325
}
383-
384-
#[test]
385-
fn test_bit_index_u32_iterator_basic() {
386-
let mask = &[0b00010010, 0b00100011];
387-
388-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
389-
let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
390-
.map(|i| i as u32)
391-
.collect();
392-
assert_eq!(result, expected);
393-
394-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 4, 8).collect();
395-
let expected: Vec<u32> = BitIndexIterator::new(mask, 4, 8)
396-
.map(|i| i as u32)
397-
.collect();
398-
assert_eq!(result, expected);
399-
400-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 10, 4).collect();
401-
let expected: Vec<u32> = BitIndexIterator::new(mask, 10, 4)
402-
.map(|i| i as u32)
403-
.collect();
404-
assert_eq!(result, expected);
405-
406-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 0).collect();
407-
let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 0)
408-
.map(|i| i as u32)
409-
.collect();
410-
assert_eq!(result, expected);
411-
}
412-
413-
#[test]
414-
fn test_bit_index_u32_iterator_all_set() {
415-
let mask = &[0xFF, 0xFF];
416-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
417-
let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
418-
.map(|i| i as u32)
419-
.collect();
420-
assert_eq!(result, expected);
421-
}
422-
423-
#[test]
424-
fn test_bit_index_u32_iterator_none_set() {
425-
let mask = &[0x00, 0x00];
426-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
427-
let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
428-
.map(|i| i as u32)
429-
.collect();
430-
assert_eq!(result, expected);
431-
}
432-
433-
#[test]
434-
fn test_bit_index_u32_cross_chunk() {
435-
let mut buf = vec![0u8; 16];
436-
for bit in 60..68 {
437-
let byte = (bit / 8) as usize;
438-
let bit_in_byte = bit % 8;
439-
buf[byte] |= 1 << bit_in_byte;
440-
}
441-
let offset = 58;
442-
let len = 10;
443-
444-
let result: Vec<u32> = BitIndexU32Iterator::new(&buf, offset, len).collect();
445-
let expected: Vec<u32> = BitIndexIterator::new(&buf, offset, len)
446-
.map(|i| i as u32)
447-
.collect();
448-
assert_eq!(result, expected);
449-
}
450-
451-
#[test]
452-
fn test_bit_index_u32_unaligned_offset() {
453-
let mask = &[0b0110_1100, 0b1010_0000];
454-
let offset = 2;
455-
let len = 12;
456-
457-
let result: Vec<u32> = BitIndexU32Iterator::new(mask, offset, len).collect();
458-
let expected: Vec<u32> = BitIndexIterator::new(mask, offset, len)
459-
.map(|i| i as u32)
460-
.collect();
461-
assert_eq!(result, expected);
462-
}
463-
464-
#[test]
465-
fn test_bit_index_u32_long_all_set() {
466-
let len = 200;
467-
let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 };
468-
let bytes = vec![0xFFu8; num_bytes];
469-
470-
let result: Vec<u32> = BitIndexU32Iterator::new(&bytes, 0, len).collect();
471-
let expected: Vec<u32> = BitIndexIterator::new(&bytes, 0, len)
472-
.map(|i| i as u32)
473-
.collect();
474-
assert_eq!(result, expected);
475-
}
476-
477-
#[test]
478-
fn test_bit_index_u32_none_set() {
479-
let len = 50;
480-
let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 };
481-
let bytes = vec![0u8; num_bytes];
482-
483-
let result: Vec<u32> = BitIndexU32Iterator::new(&bytes, 0, len).collect();
484-
let expected: Vec<u32> = BitIndexIterator::new(&bytes, 0, len)
485-
.map(|i| i as u32)
486-
.collect();
487-
assert_eq!(result, expected);
488-
}
489326
}

arrow-ord/src/sort.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,15 @@ fn partition_validity_scan(
219219
unsafe {
220220
// 1) Write valid indices (bits == 1)
221221
let valid_slice = valid.spare_capacity_mut();
222-
for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223-
valid_slice[i].write(idx);
222+
for (i, idx) in bitmap.inner().set_indices().enumerate() {
223+
valid_slice[i].write(idx as u32);
224224
}
225225

226226
// 2) Write null indices by inverting
227227
let inv_buf = !bitmap.inner();
228228
let null_slice = nulls.spare_capacity_mut();
229-
for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230-
null_slice[i].write(idx);
229+
for (i, idx) in inv_buf.set_indices().enumerate() {
230+
null_slice[i].write(idx as u32);
231231
}
232232

233233
// Finalize lengths

0 commit comments

Comments
 (0)