Skip to content

Commit 4f387e1

Browse files
authored
Split traits to get image bytes and metadata bytes (#79)
* Split traits to get image bytes and metadata bytes * fix python compile * Update docs
1 parent 8f03ceb commit 4f387e1

File tree

3 files changed

+103
-54
lines changed

3 files changed

+103
-54
lines changed

python/src/reader.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,15 @@ struct ObspecReader {
115115
}
116116

117117
impl AsyncFileReader for ObspecReader {
118-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
118+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
119119
self.backend.get_range_wrapper(&self.path, range).boxed()
120120
}
121121

122-
fn get_byte_ranges(
122+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
123+
self.backend.get_range_wrapper(&self.path, range).boxed()
124+
}
125+
126+
fn get_image_byte_ranges(
123127
&self,
124128
ranges: Vec<Range<u64>>,
125129
) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>> {

src/ifd.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ impl ImageFileDirectory {
779779
let range = self
780780
.get_tile_byte_range(x, y)
781781
.ok_or(AsyncTiffError::General("Not a tiled TIFF".to_string()))?;
782-
let compressed_bytes = reader.get_bytes(range).await?;
782+
let compressed_bytes = reader.get_image_bytes(range).await?;
783783
Ok(Tile {
784784
x,
785785
y,
@@ -809,8 +809,8 @@ impl ImageFileDirectory {
809809
})
810810
.collect::<AsyncTiffResult<Vec<_>>>()?;
811811

812-
// 2: Fetch using `get_ranges
813-
let buffers = reader.get_byte_ranges(byte_ranges).await?;
812+
// 2: Fetch using `get_image_byte_ranges`
813+
let buffers = reader.get_image_byte_ranges(byte_ranges).await?;
814814

815815
// 3: Create tile objects
816816
let mut tiles = vec![];

src/reader.rs

Lines changed: 94 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,38 @@ use crate::error::{AsyncTiffError, AsyncTiffResult};
2020
///
2121
/// Notes:
2222
///
23-
/// 1. There is a default implementation for types that implement [`tokio::io::AsyncRead`]
24-
/// and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`].
23+
/// 1. There are distinct traits for accessing "metadata bytes" and "image bytes". The requests for
24+
/// "metadata bytes" from `get_metadata_bytes` will be called from `TIFF.open`, while parsing
25+
/// IFDs. Requests for "image bytes" from `get_image_bytes` and `get_image_byte_ranges` will be
26+
/// called while fetching data from TIFF tiles or strips.
2527
///
2628
/// 2. [`ObjectReader`], available when the `object_store` crate feature
2729
/// is enabled, implements this interface for [`ObjectStore`].
2830
///
31+
/// 3. You can use [`TokioReader`] to implement [`AsyncFileReader`] for types that implement
32+
/// [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`].
33+
///
2934
/// [`ObjectStore`]: object_store::ObjectStore
3035
///
3136
/// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html
3237
pub trait AsyncFileReader: Debug + Send + Sync {
33-
/// Retrieve the bytes in `range`
34-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>>;
38+
/// Retrieve the bytes in `range` as part of a request for header metadata.
39+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>>;
40+
41+
/// Retrieve the bytes in `range` as part of a request for image data, not header metadata.
42+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>>;
3543

36-
/// Retrieve multiple byte ranges. The default implementation will call `get_bytes`
37-
/// sequentially
38-
fn get_byte_ranges(
44+
/// Retrieve multiple byte ranges as part of a request for image data, not header metadata. The
45+
/// default implementation will call `get_image_bytes` sequentially
46+
fn get_image_byte_ranges(
3947
&self,
4048
ranges: Vec<Range<u64>>,
4149
) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>> {
4250
async move {
4351
let mut result = Vec::with_capacity(ranges.len());
4452

4553
for range in ranges.into_iter() {
46-
let data = self.get_bytes(range).await?;
54+
let data = self.get_image_bytes(range).await?;
4755
result.push(data);
4856
}
4957

@@ -55,15 +63,19 @@ pub trait AsyncFileReader: Debug + Send + Sync {
5563

5664
/// This allows Box<dyn AsyncFileReader + '_> to be used as an AsyncFileReader,
5765
impl AsyncFileReader for Box<dyn AsyncFileReader + '_> {
58-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
59-
self.as_ref().get_bytes(range)
66+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
67+
self.as_ref().get_metadata_bytes(range)
68+
}
69+
70+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
71+
self.as_ref().get_image_bytes(range)
6072
}
6173

62-
fn get_byte_ranges(
74+
fn get_image_byte_ranges(
6375
&self,
6476
ranges: Vec<Range<u64>>,
6577
) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>> {
66-
self.as_ref().get_byte_ranges(ranges)
78+
self.as_ref().get_image_byte_ranges(ranges)
6779
}
6880
}
6981

@@ -89,31 +101,36 @@ impl<T: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin + Send + Debug> Toki
89101
pub fn new(inner: T) -> Self {
90102
Self(tokio::sync::Mutex::new(inner))
91103
}
92-
}
93104

94-
#[cfg(feature = "tokio")]
95-
impl<T: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin + Send + Debug> AsyncFileReader
96-
for TokioReader<T>
97-
{
98-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
105+
async fn make_range_request(&self, range: Range<u64>) -> AsyncTiffResult<Bytes> {
99106
use std::io::SeekFrom;
100107
use tokio::io::{AsyncReadExt, AsyncSeekExt};
101108

102-
async move {
103-
let mut file = self.0.lock().await;
104-
105-
file.seek(SeekFrom::Start(range.start)).await?;
109+
let mut file = self.0.lock().await;
106110

107-
let to_read = range.end - range.start;
108-
let mut buffer = Vec::with_capacity(to_read as usize);
109-
let read = file.read(&mut buffer).await? as u64;
110-
if read != to_read {
111-
return Err(AsyncTiffError::EndOfFile(to_read, read));
112-
}
111+
file.seek(SeekFrom::Start(range.start)).await?;
113112

114-
Ok(buffer.into())
113+
let to_read = range.end - range.start;
114+
let mut buffer = Vec::with_capacity(to_read as usize);
115+
let read = file.read(&mut buffer).await? as u64;
116+
if read != to_read {
117+
return Err(AsyncTiffError::EndOfFile(to_read, read));
115118
}
116-
.boxed()
119+
120+
Ok(buffer.into())
121+
}
122+
}
123+
124+
#[cfg(feature = "tokio")]
125+
impl<T: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin + Send + Debug> AsyncFileReader
126+
for TokioReader<T>
127+
{
128+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
129+
self.make_range_request(range).boxed()
130+
}
131+
132+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
133+
self.make_range_request(range).boxed()
117134
}
118135
}
119136

@@ -133,19 +150,30 @@ impl ObjectReader {
133150
pub fn new(store: Arc<dyn object_store::ObjectStore>, path: object_store::path::Path) -> Self {
134151
Self { store, path }
135152
}
136-
}
137153

138-
#[cfg(feature = "object_store")]
139-
impl AsyncFileReader for ObjectReader {
140-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
154+
async fn make_range_request(&self, range: Range<u64>) -> AsyncTiffResult<Bytes> {
141155
let range = range.start as _..range.end as _;
142156
self.store
143157
.get_range(&self.path, range)
144158
.map_err(|e| e.into())
145-
.boxed()
159+
.await
160+
}
161+
}
162+
163+
#[cfg(feature = "object_store")]
164+
impl AsyncFileReader for ObjectReader {
165+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
166+
self.make_range_request(range).boxed()
146167
}
147168

148-
fn get_byte_ranges(&self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>>
169+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
170+
self.make_range_request(range).boxed()
171+
}
172+
173+
fn get_image_byte_ranges(
174+
&self,
175+
ranges: Vec<Range<u64>>,
176+
) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>>
149177
where
150178
Self: Send,
151179
{
@@ -177,11 +205,8 @@ impl ReqwestReader {
177205
pub fn new(client: reqwest::Client, url: reqwest::Url) -> Self {
178206
Self { client, url }
179207
}
180-
}
181208

182-
#[cfg(feature = "reqwest")]
183-
impl AsyncFileReader for ReqwestReader {
184-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
209+
fn make_range_request(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
185210
let url = self.url.clone();
186211
let client = self.client.clone();
187212
// HTTP range is inclusive, so we need to subtract 1 from the end
@@ -200,6 +225,17 @@ impl AsyncFileReader for ReqwestReader {
200225
}
201226
}
202227

228+
#[cfg(feature = "reqwest")]
229+
impl AsyncFileReader for ReqwestReader {
230+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
231+
self.make_range_request(range)
232+
}
233+
234+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
235+
self.make_range_request(range)
236+
}
237+
}
238+
203239
/// An AsyncFileReader that caches the first `prefetch` bytes of a file.
204240
#[derive(Debug)]
205241
pub struct PrefetchReader {
@@ -210,34 +246,43 @@ pub struct PrefetchReader {
210246
impl PrefetchReader {
211247
/// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file.
212248
pub async fn new(reader: Arc<dyn AsyncFileReader>, prefetch: u64) -> AsyncTiffResult<Self> {
213-
let buffer = reader.get_bytes(0..prefetch).await?;
249+
let buffer = reader.get_metadata_bytes(0..prefetch).await?;
214250
Ok(Self { reader, buffer })
215251
}
216252
}
217253

218254
impl AsyncFileReader for PrefetchReader {
219-
fn get_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
255+
fn get_metadata_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
220256
if range.start < self.buffer.len() as _ {
221257
if range.end < self.buffer.len() as _ {
222258
let usize_range = range.start as usize..range.end as usize;
223259
let result = self.buffer.slice(usize_range);
224260
async { Ok(result) }.boxed()
225261
} else {
226262
// TODO: reuse partial internal buffer
227-
self.reader.get_bytes(range)
263+
self.reader.get_metadata_bytes(range)
228264
}
229265
} else {
230-
self.reader.get_bytes(range)
266+
self.reader.get_metadata_bytes(range)
231267
}
232268
}
233269

234-
fn get_byte_ranges(&self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>>
270+
fn get_image_bytes(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
271+
// In practice, get_image_bytes is only used for fetching tiles, which are unlikely
272+
// to overlap a metadata prefetch.
273+
self.reader.get_image_bytes(range)
274+
}
275+
276+
fn get_image_byte_ranges(
277+
&self,
278+
ranges: Vec<Range<u64>>,
279+
) -> BoxFuture<'_, AsyncTiffResult<Vec<Bytes>>>
235280
where
236281
Self: Send,
237282
{
238-
// In practice, get_byte_ranges is only used for fetching tiles, which are unlikely to
239-
// overlap a metadata prefetch.
240-
self.reader.get_byte_ranges(ranges)
283+
// In practice, get_image_byte_ranges is only used for fetching tiles, which are unlikely
284+
// to overlap a metadata prefetch.
285+
self.reader.get_image_byte_ranges(ranges)
241286
}
242287
}
243288

@@ -298,7 +343,7 @@ impl AsyncCursor {
298343
pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult<EndianAwareReader> {
299344
let range = self.offset as _..(self.offset + length) as _;
300345
self.offset += length;
301-
let bytes = self.reader.get_bytes(range).await?;
346+
let bytes = self.reader.get_metadata_bytes(range).await?;
302347
Ok(EndianAwareReader {
303348
reader: bytes.reader(),
304349
endianness: self.endianness,

0 commit comments

Comments
 (0)