Skip to content

Commit eddea60

Browse files
authored
Prefetch reader (#18)
1 parent d040726 commit eddea60

File tree

4 files changed

+59
-5
lines changed

4 files changed

+59
-5
lines changed

python/src/tiff.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use async_tiff::{COGReader, ObjectReader};
1+
use async_tiff::{AsyncFileReader, COGReader, ObjectReader, PrefetchReader};
22
use pyo3::prelude::*;
33
use pyo3::types::PyType;
44
use pyo3_async_runtimes::tokio::future_into_py;
@@ -12,16 +12,27 @@ pub(crate) struct PyTIFF(COGReader);
1212
#[pymethods]
1313
impl PyTIFF {
1414
#[classmethod]
15-
#[pyo3(signature = (path, *, store))]
15+
#[pyo3(signature = (path, *, store, prefetch=16384))]
1616
fn open<'py>(
1717
_cls: &'py Bound<PyType>,
1818
py: Python<'py>,
1919
path: String,
2020
store: PyObjectStore,
21+
prefetch: Option<u64>,
2122
) -> PyResult<Bound<'py, PyAny>> {
2223
let reader = ObjectReader::new(store.into_inner(), path.into());
24+
2325
let cog_reader = future_into_py(py, async move {
24-
Ok(PyTIFF(COGReader::try_open(Box::new(reader)).await.unwrap()))
26+
let reader: Box<dyn AsyncFileReader> = if let Some(prefetch) = prefetch {
27+
Box::new(
28+
PrefetchReader::new(Box::new(reader), prefetch)
29+
.await
30+
.unwrap(),
31+
)
32+
} else {
33+
Box::new(reader)
34+
};
35+
Ok(PyTIFF(COGReader::try_open(reader).await.unwrap()))
2536
})?;
2637
Ok(cog_reader)
2738
}

python/tests/test_cog.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@
66
path = "sentinel-s2-l2a-cogs/12/S/UF/2022/6/S2B_12SUF_20220609_0_L2A/B04.tif"
77

88
# 2 min, 15s
9-
tiff = await TIFF.open(path, store=store)
9+
tiff = await TIFF.open(path, store=store, prefetch=32768)
1010
ifds = tiff.ifds()
1111
ifd = ifds[0]
12+
ifd.compression
1213
ifd.tile_height
1314
ifd.tile_width
1415
ifd.photometric_interpretation
1516
gkd = ifd.geo_key_directory
1617
gkd.citation
18+
gkd.projected_type
19+
gkd.citation
20+
21+
dir(gkd)

src/async_reader.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,44 @@ impl AsyncFileReader for ObjectReader {
114114
}
115115
}
116116

117+
pub struct PrefetchReader {
118+
reader: Box<dyn AsyncFileReader>,
119+
buffer: Bytes,
120+
}
121+
122+
impl PrefetchReader {
123+
pub async fn new(mut reader: Box<dyn AsyncFileReader>, prefetch: u64) -> Result<Self> {
124+
let buffer = reader.get_bytes(0..prefetch).await?;
125+
Ok(Self { reader, buffer })
126+
}
127+
}
128+
129+
impl AsyncFileReader for PrefetchReader {
130+
fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
131+
if range.start < self.buffer.len() as _ {
132+
if range.end < self.buffer.len() as _ {
133+
let usize_range = range.start as usize..range.end as usize;
134+
let result = self.buffer.slice(usize_range);
135+
async { Ok(result) }.boxed()
136+
} else {
137+
// TODO: reuse partial internal buffer
138+
self.reader.get_bytes(range)
139+
}
140+
} else {
141+
self.reader.get_bytes(range)
142+
}
143+
}
144+
145+
fn get_byte_ranges(&mut self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, Result<Vec<Bytes>>>
146+
where
147+
Self: Send,
148+
{
149+
// In practice, get_byte_ranges is only used for fetching tiles, which are unlikely to
150+
// overlap a metadata prefetch.
151+
self.reader.get_byte_ranges(ranges)
152+
}
153+
}
154+
117155
#[derive(Debug, Clone, Copy, Default)]
118156
pub enum Endianness {
119157
#[default]

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ pub mod error;
77
pub mod geo;
88
mod ifd;
99

10-
pub use async_reader::{AsyncFileReader, ObjectReader};
10+
pub use async_reader::{AsyncFileReader, ObjectReader, PrefetchReader};
1111
pub use cog::COGReader;
1212
pub use ifd::{ImageFileDirectories, ImageFileDirectory};

0 commit comments

Comments
 (0)