Skip to content

Commit 1d60252

Browse files
authored
Merge pull request #214 from mulimoen/feature/chunks-iter
Chunks iterator
2 parents 12b4067 + e3ffa48 commit 1d60252

File tree

5 files changed

+201
-58
lines changed

5 files changed

+201
-58
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
- Support field renaming via `#[hdf5(rename = "new_name")]` helper attribute.
1010
- Add a `ByteReader` which implements `std::io::{Read, Seek}` for 1D `u8`
1111
datasets. Usage via `Dataset::as_byte_reader()`.
12+
- Add `chunk_visit` to visit all chunks in a dataset.
1213

1314
### Changed
1415

1516
- The `H5Type` derive macro now uses `proc-macro-error` to emit error messages.
1617
- MSRV is now `1.64.0` and Rust edition has now been bumped to 2021.
18+
- Types in ChunkInfo has been changed to match HDF5
1719

1820
### Fixed
1921

hdf5/src/hl.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod attribute;
2+
pub mod chunks;
23
pub mod container;
34
pub mod dataset;
45
pub mod dataspace;

hdf5/src/hl/chunks.rs

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
use crate::internal_prelude::*;
2+
3+
#[cfg(feature = "1.10.5")]
4+
use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks};
5+
6+
#[cfg(feature = "1.10.5")]
7+
#[derive(Clone, Debug, PartialEq, Eq)]
8+
/// Information on a chunk in a Dataset
9+
pub struct ChunkInfo {
10+
/// Array with a size equal to the dataset’s rank whose elements contain 0-based
11+
/// logical positions of the chunk’s first element in each dimension.
12+
pub offset: Vec<hsize_t>,
13+
/// Filter mask that indicates which filters were used with the chunk when written.
14+
///
15+
/// A zero value indicates that all enabled filters are applied on the chunk.
16+
/// A filter is skipped if the bit corresponding to the filter’s position in
17+
/// the pipeline (0 ≤ position < 32) is turned on.
18+
pub filter_mask: u32,
19+
/// Chunk address in the file.
20+
pub addr: haddr_t,
21+
/// Chunk size in bytes.
22+
pub size: hsize_t,
23+
}
24+
25+
#[cfg(feature = "1.10.5")]
26+
impl ChunkInfo {
27+
pub(crate) fn new(ndim: usize) -> Self {
28+
let offset = vec![0; ndim];
29+
Self { offset, filter_mask: 0, addr: 0, size: 0 }
30+
}
31+
32+
/// Returns positional indices of disabled filters.
33+
pub fn disabled_filters(&self) -> Vec<usize> {
34+
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
35+
}
36+
}
37+
38+
#[cfg(feature = "1.10.5")]
39+
pub(crate) fn chunk_info(ds: &Dataset, index: usize) -> Option<ChunkInfo> {
40+
if !ds.is_chunked() {
41+
return None;
42+
}
43+
h5lock!(ds.space().map_or(None, |s| {
44+
let mut chunk_info = ChunkInfo::new(ds.ndim());
45+
h5check(H5Dget_chunk_info(
46+
ds.id(),
47+
s.id(),
48+
index as _,
49+
chunk_info.offset.as_mut_ptr(),
50+
&mut chunk_info.filter_mask,
51+
&mut chunk_info.addr,
52+
&mut chunk_info.size,
53+
))
54+
.map(|_| chunk_info)
55+
.ok()
56+
}))
57+
}
58+
59+
#[cfg(feature = "1.10.5")]
60+
pub(crate) fn get_num_chunks(ds: &Dataset) -> Option<usize> {
61+
if !ds.is_chunked() {
62+
return None;
63+
}
64+
h5lock!(ds.space().map_or(None, |s| {
65+
let mut n: hsize_t = 0;
66+
h5check(H5Dget_num_chunks(ds.id(), s.id(), &mut n)).map(|_| n as _).ok()
67+
}))
68+
}
69+
70+
#[cfg(feature = "1.14.0")]
71+
mod v1_14_0 {
72+
use super::*;
73+
use hdf5_sys::h5d::H5Dchunk_iter;
74+
75+
/// Borrowed version of [ChunkInfo](crate::dataset::ChunkInfo)
76+
#[derive(Clone, Debug, PartialEq, Eq)]
77+
pub struct ChunkInfoRef<'a> {
78+
pub offset: &'a [hsize_t],
79+
pub filter_mask: u32,
80+
pub addr: haddr_t,
81+
pub size: hsize_t,
82+
}
83+
84+
impl<'a> ChunkInfoRef<'a> {
85+
/// Returns positional indices of disabled filters.
86+
pub fn disabled_filters(&self) -> Vec<usize> {
87+
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
88+
}
89+
}
90+
91+
impl<'a> From<ChunkInfoRef<'a>> for ChunkInfo {
92+
fn from(val: ChunkInfoRef<'a>) -> Self {
93+
Self {
94+
offset: val.offset.to_owned(),
95+
filter_mask: val.filter_mask,
96+
addr: val.addr,
97+
size: val.size,
98+
}
99+
}
100+
}
101+
102+
#[repr(C)]
103+
struct RustCallback<F> {
104+
pub ndims: hsize_t,
105+
pub callback: F,
106+
}
107+
108+
extern "C" fn chunks_callback<F>(
109+
offset: *const hsize_t, filter_mask: c_uint, addr: haddr_t, size: hsize_t,
110+
op_data: *mut c_void,
111+
) -> herr_t
112+
where
113+
F: FnMut(ChunkInfoRef) -> i32,
114+
{
115+
unsafe {
116+
std::panic::catch_unwind(|| {
117+
let data: *mut RustCallback<F> = op_data.cast::<RustCallback<F>>();
118+
let ndims = (*data).ndims;
119+
let callback = &mut (*data).callback;
120+
121+
let offset = std::slice::from_raw_parts(offset, ndims as usize);
122+
123+
let info = ChunkInfoRef { offset, filter_mask, addr, size };
124+
125+
callback(info)
126+
})
127+
.unwrap_or(-1)
128+
}
129+
}
130+
131+
pub(crate) fn visit<F>(ds: &Dataset, callback: F) -> Result<()>
132+
where
133+
F: for<'a> FnMut(ChunkInfoRef<'a>) -> i32,
134+
{
135+
let mut data = RustCallback::<F> { ndims: ds.ndim() as _, callback };
136+
137+
h5try!(H5Dchunk_iter(
138+
ds.id(),
139+
H5P_DEFAULT,
140+
Some(chunks_callback::<F>),
141+
std::ptr::addr_of_mut!(data).cast()
142+
));
143+
144+
Ok(())
145+
}
146+
147+
#[cfg(test)]
148+
mod test {
149+
use super::*;
150+
151+
#[test]
152+
fn chunks_visit() {
153+
with_tmp_file(|f| {
154+
let ds = f.new_dataset::<i16>().no_chunk().shape((4, 4)).create("nochunk").unwrap();
155+
assert_err_re!(visit(&ds, |_| 0), "not a chunked dataset");
156+
157+
let ds =
158+
f.new_dataset::<i16>().shape([3, 2]).chunk([1, 1]).create("chunk").unwrap();
159+
ds.write(&ndarray::arr2(&[[1, 2], [3, 4], [5, 6]])).unwrap();
160+
161+
let mut i = 0;
162+
let f = |c: ChunkInfoRef| {
163+
match i {
164+
0 => assert_eq!(c.offset, [0, 0]),
165+
1 => assert_eq!(c.offset, [0, 1]),
166+
2 => assert_eq!(c.offset, [1, 0]),
167+
3 => assert_eq!(c.offset, [1, 1]),
168+
4 => assert_eq!(c.offset, [2, 0]),
169+
5 => assert_eq!(c.offset, [2, 1]),
170+
_ => unreachable!(),
171+
}
172+
assert_eq!(c.size, std::mem::size_of::<i16>() as u64);
173+
i += 1;
174+
0
175+
};
176+
visit(&ds, f).unwrap();
177+
assert_eq!(i, 6);
178+
})
179+
}
180+
}
181+
}
182+
#[cfg(feature = "1.14.0")]
183+
pub use v1_14_0::*;

hdf5/src/hl/dataset.rs

+12-57
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ use hdf5_sys::h5d::{
88
H5Dcreate2, H5Dcreate_anon, H5Dget_access_plist, H5Dget_create_plist, H5Dget_offset,
99
H5Dset_extent,
1010
};
11-
#[cfg(feature = "1.10.5")]
12-
use hdf5_sys::h5d::{H5Dget_chunk_info, H5Dget_num_chunks};
1311
use hdf5_sys::h5l::H5Ldelete;
1412
use hdf5_sys::h5p::H5P_DEFAULT;
1513
use hdf5_sys::h5z::H5Z_filter_t;
@@ -66,36 +64,6 @@ impl Deref for Dataset {
6664
}
6765
}
6866

69-
#[cfg(feature = "1.10.5")]
70-
#[derive(Clone, Debug, PartialEq, Eq)]
71-
pub struct ChunkInfo {
72-
/// Array with a size equal to the dataset’s rank whose elements contain 0-based
73-
/// logical positions of the chunk’s first element in each dimension.
74-
pub offset: Vec<u64>,
75-
/// Filter mask that indicates which filters were used with the chunk when written.
76-
/// A zero value indicates that all enabled filters are applied on the chunk.
77-
/// A filter is skipped if the bit corresponding to the filter’s position in
78-
/// the pipeline (0 ≤ position < 32) is turned on.
79-
pub filter_mask: u32,
80-
/// Chunk address in the file.
81-
pub addr: u64,
82-
/// Chunk size in bytes.
83-
pub size: u64,
84-
}
85-
86-
#[cfg(feature = "1.10.5")]
87-
impl ChunkInfo {
88-
pub(crate) fn new(ndim: usize) -> Self {
89-
let offset = vec![0; ndim];
90-
Self { offset, filter_mask: 0, addr: 0, size: 0 }
91-
}
92-
93-
/// Returns positional indices of disabled filters.
94-
pub fn disabled_filters(&self) -> Vec<usize> {
95-
(0..32).filter(|i| self.filter_mask & (1 << i) != 0).collect()
96-
}
97-
}
98-
9967
impl Dataset {
10068
/// Returns a copy of the dataset access property list.
10169
pub fn access_plist(&self) -> Result<DatasetAccess> {
@@ -135,42 +103,29 @@ impl Dataset {
135103
#[cfg(feature = "1.10.5")]
136104
/// Returns the number of chunks if the dataset is chunked.
137105
pub fn num_chunks(&self) -> Option<usize> {
138-
if !self.is_chunked() {
139-
return None;
140-
}
141-
h5lock!(self.space().map_or(None, |s| {
142-
let mut n: hsize_t = 0;
143-
h5check(H5Dget_num_chunks(self.id(), s.id(), &mut n)).map(|_| n as _).ok()
144-
}))
106+
crate::hl::chunks::get_num_chunks(self)
145107
}
146108

147109
#[cfg(feature = "1.10.5")]
148110
/// Retrieves the chunk information for the chunk specified by its index.
149-
pub fn chunk_info(&self, index: usize) -> Option<ChunkInfo> {
150-
if !self.is_chunked() {
151-
return None;
152-
}
153-
h5lock!(self.space().map_or(None, |s| {
154-
let mut chunk_info = ChunkInfo::new(self.ndim());
155-
h5check(H5Dget_chunk_info(
156-
self.id(),
157-
s.id(),
158-
index as _,
159-
chunk_info.offset.as_mut_ptr(),
160-
&mut chunk_info.filter_mask,
161-
&mut chunk_info.addr,
162-
&mut chunk_info.size,
163-
))
164-
.map(|_| chunk_info)
165-
.ok()
166-
}))
111+
pub fn chunk_info(&self, index: usize) -> Option<crate::dataset::ChunkInfo> {
112+
crate::hl::chunks::chunk_info(self, index)
167113
}
168114

169115
/// Returns the chunk shape if the dataset is chunked.
170116
pub fn chunk(&self) -> Option<Vec<Ix>> {
171117
self.dcpl().map_or(None, |pl| pl.chunk())
172118
}
173119

120+
/// Visit all chunks
121+
#[cfg(feature = "1.14.0")]
122+
pub fn chunks_visit<F>(&self, callback: F) -> Result<()>
123+
where
124+
F: for<'a> FnMut(crate::dataset::ChunkInfoRef<'a>) -> i32,
125+
{
126+
crate::hl::chunks::visit(self, callback)
127+
}
128+
174129
/// Returns the absolute byte offset of the dataset in the file if such offset is defined
175130
/// (which is not the case for datasets that are chunked, compact or not allocated yet).
176131
pub fn offset(&self) -> Option<u64> {

hdf5/src/lib.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ mod export {
7878

7979
pub mod dataset {
8080
#[cfg(feature = "1.10.5")]
81-
pub use crate::hl::dataset::ChunkInfo;
81+
pub use crate::hl::chunks::ChunkInfo;
82+
#[cfg(feature = "1.14.0")]
83+
pub use crate::hl::chunks::ChunkInfoRef;
8284
pub use crate::hl::dataset::{Chunk, Dataset, DatasetBuilder};
8385
pub use crate::hl::plist::dataset_access::*;
8486
pub use crate::hl::plist::dataset_create::*;

0 commit comments

Comments
 (0)