From 2c22931580bbbb526e0d67d04eb95dfc07ed8434 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Mon, 8 Jul 2024 11:13:01 -0400
Subject: [PATCH 01/21] Foundation for ZIP support

---
 sdk/Cargo.toml                   |   4 +-
 sdk/src/asset_handlers/mod.rs    |   1 +
 sdk/src/asset_handlers/zip_io.rs | 233 +++++++++++++++++++++++++++++++
 sdk/src/builder.rs               |   4 +-
 sdk/src/jumbf_io.rs              |   7 +-
 5 files changed, 244 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/asset_handlers/zip_io.rs

diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 798c3a6ef..aed7018c3 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -121,11 +121,11 @@ sha2 = "0.10.2"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
 treeline = "0.1.0"
-url = "2.2.2, <2.5.1"  # Can't use 2.5.1 or newer until new license is reviewed.
+url = "2.2.2, <2.5.1"                                                    # Can't use 2.5.1 or newer until new license is reviewed.
 uuid = { version = "1.3.1", features = ["serde", "v4", "wasm-bindgen"] }
 x509-parser = "0.15.1"
 x509-certificate = "0.19.0"
-zip = { version = "0.6.6", default-features = false }
+zip = { version = "2.1.3", default-features = false }
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 ureq = "2.4.0"
diff --git a/sdk/src/asset_handlers/mod.rs b/sdk/src/asset_handlers/mod.rs
index 8bc7bfba6..dc44b0523 100644
--- a/sdk/src/asset_handlers/mod.rs
+++ b/sdk/src/asset_handlers/mod.rs
@@ -19,6 +19,7 @@ pub mod png_io;
 pub mod riff_io;
 pub mod svg_io;
 pub mod tiff_io;
+pub mod zip_io;
 
 #[cfg(feature = "pdf")]
 pub(crate) mod pdf;
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
new file mode 100644
index 000000000..a411cdea6
--- /dev/null
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -0,0 +1,233 @@
+use std::{
+    fs::{self, File},
+    io::{self, Read},
+    path::Path,
+};
+
+use tempfile::Builder;
+use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};
+
+use crate::{
+    asset_io::{
+        self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
+        HashObjectPositions,
+    },
+    error::Result,
+    CAIRead, CAIReadWrite, Error,
+};
+
+pub struct ZipIO {}
+
+impl CAIWriter for ZipIO {
+    fn write_cai(
+        &self,
+        input_stream: &mut dyn CAIRead,
+        output_stream: &mut dyn CAIReadWrite,
+        mut store_bytes: &[u8],
+    ) -> Result<()> {
+        let mut writer = self
+            .writer(input_stream, output_stream)
+            .map_err(|_| Error::EmbeddingError)?;
+
+        // TODO: what happens if the dir exists?
+        writer
+            .add_directory("META-INF", SimpleFileOptions::default())
+            .map_err(|_| Error::EmbeddingError)?;
+
+        writer
+            .start_file_from_path(
+                Path::new("META-INF/content_credential.c2pa"),
+                SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
+            )
+            .map_err(|_| Error::EmbeddingError)?;
+        io::copy(&mut store_bytes, &mut writer)?;
+        writer.finish().map_err(|_| Error::EmbeddingError)?;
+
+        Ok(())
+    }
+
+    fn get_object_locations_from_stream(
+        &self,
+        _input_stream: &mut dyn CAIRead,
+    ) -> Result<Vec<HashObjectPositions>> {
+        // TODO: error?
+        Ok(Vec::new())
+    }
+
+    fn remove_cai_store_from_stream(
+        &self,
+        input_stream: &mut dyn CAIRead,
+        output_stream: &mut dyn CAIReadWrite,
+    ) -> Result<()> {
+        let mut writer = self
+            .writer(input_stream, output_stream)
+            .map_err(|_| Error::EmbeddingError)?;
+
+        writer
+            .start_file_from_path(
+                Path::new("META-INF/content_credential.c2pa"),
+                SimpleFileOptions::default(),
+            )
+            .map_err(|_| Error::EmbeddingError)?;
+        writer.abort_file().map_err(|_| Error::EmbeddingError)?;
+        writer.finish().map_err(|_| Error::EmbeddingError)?;
+
+        Ok(())
+    }
+}
+
+impl CAIReader for ZipIO {
+    fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result<Vec<u8>> {
+        let mut reader = self
+            .reader(asset_reader)
+            .map_err(|_| Error::JumbfNotFound)?;
+
+        let index = reader
+            .index_for_path(Path::new("META-INF/content_credential.c2pa"))
+            .ok_or(Error::JumbfNotFound)?;
+        let mut file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
+
+        let mut bytes = Vec::new();
+        file.read_to_end(&mut bytes)?;
+
+        Ok(bytes)
+    }
+
+    fn read_xmp(&self, _asset_reader: &mut dyn CAIRead) -> Option<String> {
+        None
+    }
+}
+
+impl AssetIO for ZipIO {
+    fn new(_asset_type: &str) -> Self
+    where
+        Self: Sized,
+    {
+        ZipIO {}
+    }
+
+    fn get_handler(&self, asset_type: &str) -> Box<dyn AssetIO> {
+        Box::new(ZipIO::new(asset_type))
+    }
+
+    fn get_reader(&self) -> &dyn CAIReader {
+        self
+    }
+
+    fn get_writer(&self, asset_type: &str) -> Option<Box<dyn CAIWriter>> {
+        Some(Box::new(ZipIO::new(asset_type)))
+    }
+
+    fn read_cai_store(&self, asset_path: &Path) -> Result<Vec<u8>> {
+        let mut f = File::open(asset_path)?;
+        self.read_cai(&mut f)
+    }
+
+    fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> {
+        let mut stream = fs::OpenOptions::new()
+            .read(true)
+            .open(asset_path)
+            .map_err(Error::IoError)?;
+
+        let mut temp_file = Builder::new()
+            .prefix("c2pa_temp")
+            .rand_bytes(5)
+            .tempfile()?;
+
+        self.write_cai(&mut stream, &mut temp_file, store_bytes)?;
+
+        asset_io::rename_or_move(temp_file, asset_path)
+    }
+
+    fn get_object_locations(&self, asset_path: &Path) -> Result<Vec<HashObjectPositions>> {
+        let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?;
+        self.get_object_locations_from_stream(&mut f)
+    }
+
+    fn remove_cai_store(&self, asset_path: &Path) -> Result<()> {
+        let mut stream = fs::OpenOptions::new()
+            .read(true)
+            .open(asset_path)
+            .map_err(Error::IoError)?;
+
+        let mut temp_file = Builder::new()
+            .prefix("c2pa_temp")
+            .rand_bytes(5)
+            .tempfile()?;
+
+        self.remove_cai_store_from_stream(&mut stream, &mut temp_file)?;
+
+        asset_io::rename_or_move(temp_file, asset_path)
+    }
+
+    fn supported_types(&self) -> &[&str] {
+        &[
+            // Zip
+            "zip",
+            "application/x-zip",
+            // EPUB
+            "epub",
+            "application/epub+zip",
+            // Office Open XML
+            "docx",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "xlsx",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            "pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            "docm",
+            "application/vnd.ms-word.document.macroEnabled.12",
+            "xlsm",
+            "application/vnd.ms-excel.sheet.macroEnabled.12",
+            "pptm",
+            "application/vnd.ms-powerpoint.presentation.macroEnabled.12",
+            // Open Document
+            "odt",
+            "application/vnd.oasis.opendocument.text",
+            "ods",
+            "application/vnd.oasis.opendocument.spreadsheet",
+            "odp",
+            "application/vnd.oasis.opendocument.presentation",
+            "odg",
+            "application/vnd.oasis.opendocument.graphics",
+            "ott",
+            "application/vnd.oasis.opendocument.text-template",
+            "ots",
+            "application/vnd.oasis.opendocument.spreadsheet-template",
+            "otp",
+            "application/vnd.oasis.opendocument.presentation-template",
+            "otg",
+            "application/vnd.oasis.opendocument.graphics-template",
+            // OpenXPS
+            "oxps",
+            "application/oxps",
+        ]
+    }
+}
+
+impl ZipIO {
+    fn writer<'a>(
+        &self,
+        input_stream: &'a mut dyn CAIRead,
+        output_stream: &'a mut dyn CAIReadWrite,
+    ) -> ZipResult<ZipWriter<CAIReadWriteWrapper<'a>>> {
+        let mut writer = ZipWriter::new_append(CAIReadWriteWrapper {
+            reader_writer: output_stream,
+        })?;
+
+        writer.merge_archive(ZipArchive::new(CAIReadWrapper {
+            reader: input_stream,
+        })?)?;
+
+        Ok(writer)
+    }
+
+    fn reader<'a>(
+        &self,
+        input_stream: &'a mut dyn CAIRead,
+    ) -> ZipResult<ZipArchive<CAIReadWrapper<'a>>> {
+        ZipArchive::new(CAIReadWrapper {
+            reader: input_stream,
+        })
+    }
+}
diff --git a/sdk/src/builder.rs b/sdk/src/builder.rs
index 4e8005389..8eb4f1258 100644
--- a/sdk/src/builder.rs
+++ b/sdk/src/builder.rs
@@ -20,7 +20,7 @@ use async_generic::async_generic;
 use serde::{Deserialize, Serialize};
 use serde_with::skip_serializing_none;
 use uuid::Uuid;
-use zip::{write::FileOptions, ZipArchive, ZipWriter};
+use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter};
 
 use crate::{
     assertion::AssertionBase,
@@ -370,7 +370,7 @@ impl Builder {
             {
                 let mut zip = ZipWriter::new(stream);
                 let options =
-                    FileOptions::default().compression_method(zip::CompressionMethod::Stored);
+                    SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
                 zip.start_file("manifest.json", options)
                     .map_err(|e| Error::OtherError(Box::new(e)))?;
                 zip.write_all(&serde_json::to_vec(self)?)?;
diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs
index 6b23478e5..f96ac4d09 100644
--- a/sdk/src/jumbf_io.rs
+++ b/sdk/src/jumbf_io.rs
@@ -28,7 +28,7 @@ use crate::asset_handlers::pdf_io::PdfIO;
 use crate::{
     asset_handlers::{
         bmff_io::BmffIO, c2pa_io::C2paIO, jpeg_io::JpegIO, mp3_io::Mp3IO, png_io::PngIO,
-        riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO,
+        riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO, zip_io::ZipIO,
     },
     asset_io::{AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashObjectPositions},
     error::{Error, Result},
@@ -48,6 +48,7 @@ lazy_static! {
             Box::new(SvgIO::new("")),
             Box::new(TiffIO::new("")),
             Box::new(Mp3IO::new("")),
+            Box::new(ZipIO::new("")),
         ];
 
         let mut handler_map = HashMap::new();
@@ -76,6 +77,7 @@ lazy_static! {
             Box::new(SvgIO::new("")),
             Box::new(TiffIO::new("")),
             Box::new(Mp3IO::new("")),
+            Box::new(ZipIO::new("")),
         ];
         let mut handler_map = HashMap::new();
 
@@ -364,6 +366,7 @@ pub mod tests {
             Box::new(TiffIO::new("")),
             Box::new(SvgIO::new("")),
             Box::new(Mp3IO::new("")),
+            Box::new(ZipIO::new("")),
         ];
 
         // build handler map
@@ -388,6 +391,7 @@ pub mod tests {
             Box::new(TiffIO::new("")),
             Box::new(SvgIO::new("")),
             Box::new(Mp3IO::new("")),
+            Box::new(ZipIO::new("")),
         ];
 
         // build handler map
@@ -405,6 +409,7 @@ pub mod tests {
             Box::new(JpegIO::new("")),
             Box::new(PngIO::new("")),
             Box::new(Mp3IO::new("")),
+            Box::new(ZipIO::new("")),
             Box::new(SvgIO::new("")),
             Box::new(RiffIO::new("")),
         ];

From a84411a2f5e93ab588fac92160f6688a8d547ce7 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Mon, 8 Jul 2024 15:19:29 -0400
Subject: [PATCH 02/21] Foundation for collection data hash assertion

---
 sdk/src/assertions/collection_hash.rs | 79 +++++++++++++++++++++++++++
 sdk/src/assertions/mod.rs             |  3 +
 sdk/src/asset_handlers/zip_io.rs      | 41 +++++++++++++-
 sdk/src/utils/hash_utils.rs           |  2 +
 4 files changed, 123 insertions(+), 2 deletions(-)
 create mode 100644 sdk/src/assertions/collection_hash.rs

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
new file mode 100644
index 000000000..305f0d005
--- /dev/null
+++ b/sdk/src/assertions/collection_hash.rs
@@ -0,0 +1,79 @@
+use std::io::{Read, Seek};
+
+use serde::{Deserialize, Serialize};
+
+use crate::{assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, Error, Result};
+
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
+pub struct CollectionHash {
+    pub uri_maps: Vec<UriHashedDataMap>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub alg: Option<String>,
+
+    #[serde(skip_serializing_if = "Option::is_none", with = "serde_bytes")]
+    pub zip_central_directory_hash: Option<Vec<u8>>,
+}
+
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
+pub struct UriHashedDataMap {
+    pub uri: String,
+
+    #[serde(with = "serde_bytes")]
+    pub hash: Vec<u8>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub size: Option<u64>,
+
+    #[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")]
+    pub dc_format: Option<String>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub data_types: Option<Vec<AssetType>>,
+}
+
+impl CollectionHash {
+    pub fn new(alg: String) -> Self {
+        CollectionHash {
+            uri_maps: Vec::new(),
+            alg: Some(alg),
+            zip_central_directory_hash: None,
+        }
+    }
+
+    fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
+        self.uri_maps.push(uri_map);
+    }
+
+    // TODO: support custom collection hashes
+    pub fn gen_hash_from_stream<R>(&mut self, stream: &mut R) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        let alg = match self.alg {
+            Some(ref a) => a.clone(),
+            None => "sha256".to_string(),
+        };
+
+        let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
+        let zip_central_directory_hash =
+            hash_stream_by_alg(&alg, stream, Some(zip_central_directory_inclusions), false)?;
+        if zip_central_directory_hash.is_empty() {
+            return Err(Error::BadParam("could not generate data hash".to_string()));
+        }
+        self.zip_central_directory_hash = Some(zip_central_directory_hash);
+
+        let uri_inclusions = zip_io::uri_inclusions(stream, &self.uri_maps)?;
+        for (i, uri_map) in self.uri_maps.iter_mut().enumerate() {
+            let hash =
+                hash_stream_by_alg(&alg, stream, Some(vec![uri_inclusions[i].clone()]), false)?;
+            if hash.is_empty() {
+                return Err(Error::BadParam("could not generate data hash".to_string()));
+            }
+
+            uri_map.hash = hash;
+        }
+
+        Ok(())
+    }
+}
diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs
index dae13620e..8c71b3993 100644
--- a/sdk/src/assertions/mod.rs
+++ b/sdk/src/assertions/mod.rs
@@ -25,6 +25,9 @@ pub use box_hash::{BoxHash, BoxMap, C2PA_BOXHASH};
 mod data_hash;
 pub use data_hash::DataHash;
 
+mod collection_hash;
+pub use collection_hash::{CollectionHash, UriHashedDataMap};
+
 mod creative_work;
 pub use creative_work::CreativeWork;
 
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index a411cdea6..18c59fed4 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -1,6 +1,6 @@
 use std::{
     fs::{self, File},
-    io::{self, Read},
+    io::{self, Read, Seek},
     path::Path,
 };
 
@@ -8,12 +8,13 @@ use tempfile::Builder;
 use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};
 
 use crate::{
+    assertions::UriHashedDataMap,
     asset_io::{
         self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
         HashObjectPositions,
     },
     error::Result,
-    CAIRead, CAIReadWrite, Error,
+    CAIRead, CAIReadWrite, Error, HashRange,
 };
 
 pub struct ZipIO {}
@@ -231,3 +232,39 @@ impl ZipIO {
         })
     }
 }
+
+pub fn central_directory_inclusions<R>(reader: &mut R) -> Result<Vec<HashRange>>
+where
+    R: Read + Seek + ?Sized,
+{
+    let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
+
+    // TODO: https://github.com/zip-rs/zip2/pull/71
+    //       or
+    //       https://gitlab.com/xMAC94x/zip-core (https://github.com/zip-rs/zip2/issues/204)
+
+    todo!()
+}
+
+pub fn uri_inclusions<R>(reader: &mut R, uri_maps: &[UriHashedDataMap]) -> Result<Vec<HashRange>>
+where
+    R: Read + Seek + ?Sized,
+{
+    let mut reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
+
+    let mut ranges = Vec::new();
+    for uri_map in uri_maps {
+        let index = reader
+            .index_for_path(Path::new(&uri_map.uri))
+            .ok_or(Error::JumbfNotFound)?;
+        let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
+        // TODO: hash from header or data? does compressed_size include header?
+        //       and fix error type
+        ranges.push(HashRange::new(
+            usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
+            usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
+        ));
+    }
+
+    Ok(ranges)
+}
diff --git a/sdk/src/utils/hash_utils.rs b/sdk/src/utils/hash_utils.rs
index 241769204..056b9f4dd 100644
--- a/sdk/src/utils/hash_utils.rs
+++ b/sdk/src/utils/hash_utils.rs
@@ -206,6 +206,8 @@ pub fn hash_asset_by_alg_with_inclusions(
     to_be_hashed: [IIIIIXXXXXMIIIIIMXXXXXMXXXXIII...III]
 
     The data is again split into range sets breaking at the exclusion points and now also the markers.
+
+    // TODO: describe collection hash
 */
 pub fn hash_stream_by_alg<R>(
     alg: &str,

From c03d65393bc330e96b227aa5e2b9b840c04df11e Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 10:50:42 -0400
Subject: [PATCH 03/21] Collection assertion hash resolver

---
 sdk/src/assertions/collection_hash.rs | 60 +++++++++++++++++++--------
 sdk/src/assertions/mod.rs             |  2 +-
 sdk/src/asset_handlers/zip_io.rs      | 53 ++++++++++++++---------
 3 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 305f0d005..7c4695f75 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -2,7 +2,11 @@ use std::io::{Read, Seek};
 
 use serde::{Deserialize, Serialize};
 
-use crate::{assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, Error, Result};
+use crate::{
+    assertions::AssetType,
+    asset_handlers::zip_io::{self, ZipHashResolver},
+    hash_stream_by_alg, CAIRead, Error, HashRange, Result,
+};
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct CollectionHash {
@@ -41,19 +45,34 @@ impl CollectionHash {
         }
     }
 
-    fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
+    pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
         self.uri_maps.push(uri_map);
     }
 
-    // TODO: support custom collection hashes
-    pub fn gen_hash_from_stream<R>(&mut self, stream: &mut R) -> Result<()>
+    pub fn gen_hash_from_stream<R, T>(&mut self, stream: &mut R, mut resolver: T) -> Result<()>
     where
         R: Read + Seek + ?Sized,
+        T: UriHashResolver,
     {
-        let alg = match self.alg {
-            Some(ref a) => a.clone(),
-            None => "sha256".to_string(),
-        };
+        let alg = self.alg();
+        for uri_map in &mut self.uri_maps {
+            let inclusions = resolver.resolve(uri_map);
+            let hash = hash_stream_by_alg(&alg, stream, Some(inclusions), false)?;
+            if hash.is_empty() {
+                return Err(Error::BadParam("could not generate data hash".to_string()));
+            }
+
+            uri_map.hash = hash;
+        }
+
+        Ok(())
+    }
+
+    pub fn gen_hash_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        let alg = self.alg();
 
         let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
         let zip_central_directory_hash =
@@ -63,17 +82,24 @@ impl CollectionHash {
         }
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
-        let uri_inclusions = zip_io::uri_inclusions(stream, &self.uri_maps)?;
-        for (i, uri_map) in self.uri_maps.iter_mut().enumerate() {
-            let hash =
-                hash_stream_by_alg(&alg, stream, Some(vec![uri_inclusions[i].clone()]), false)?;
-            if hash.is_empty() {
-                return Err(Error::BadParam("could not generate data hash".to_string()));
-            }
+        let resolver = ZipHashResolver::new(stream, &self.uri_maps)?;
+        self.gen_hash_from_stream(stream, resolver)?;
 
-            uri_map.hash = hash;
-        }
+        Ok(())
+    }
 
+    pub fn verify_stream_hash(&self, reader: &mut dyn CAIRead, alg: Option<&str>) -> Result<()> {
         Ok(())
     }
+
+    fn alg(&self) -> String {
+        match self.alg {
+            Some(ref a) => a.clone(),
+            None => "sha256".to_string(),
+        }
+    }
+}
+
+pub trait UriHashResolver {
+    fn resolve(&mut self, uri_map: &UriHashedDataMap) -> Vec<HashRange>;
 }
diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs
index 8c71b3993..067c893c1 100644
--- a/sdk/src/assertions/mod.rs
+++ b/sdk/src/assertions/mod.rs
@@ -26,7 +26,7 @@ mod data_hash;
 pub use data_hash::DataHash;
 
 mod collection_hash;
-pub use collection_hash::{CollectionHash, UriHashedDataMap};
+pub use collection_hash::{CollectionHash, UriHashResolver, UriHashedDataMap};
 
 mod creative_work;
 pub use creative_work::CreativeWork;
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 18c59fed4..b81cb03b8 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -8,7 +8,7 @@ use tempfile::Builder;
 use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};
 
 use crate::{
-    assertions::UriHashedDataMap,
+    assertions::{UriHashResolver, UriHashedDataMap},
     asset_io::{
         self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
         HashObjectPositions,
@@ -246,25 +246,40 @@ where
     todo!()
 }
 
-pub fn uri_inclusions<R>(reader: &mut R, uri_maps: &[UriHashedDataMap]) -> Result<Vec<HashRange>>
-where
-    R: Read + Seek + ?Sized,
-{
-    let mut reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
+pub struct ZipHashResolver {
+    ranges: Vec<HashRange>,
+    i: usize,
+}
 
-    let mut ranges = Vec::new();
-    for uri_map in uri_maps {
-        let index = reader
-            .index_for_path(Path::new(&uri_map.uri))
-            .ok_or(Error::JumbfNotFound)?;
-        let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
-        // TODO: hash from header or data? does compressed_size include header?
-        //       and fix error type
-        ranges.push(HashRange::new(
-            usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
-            usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
-        ));
+impl ZipHashResolver {
+    pub fn new<R: Read + Seek + ?Sized>(
+        stream: &mut R,
+        uri_maps: &[UriHashedDataMap],
+    ) -> Result<Self> {
+        let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
+
+        let mut ranges = Vec::new();
+        for uri_map in uri_maps {
+            let index = reader
+                .index_for_path(Path::new(&uri_map.uri))
+                .ok_or(Error::JumbfNotFound)?;
+            let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
+            // TODO: hash from header or data? does compressed_size include header?
+            //       and fix error type
+            ranges.push(HashRange::new(
+                usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
+                usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
+            ));
+        }
+
+        Ok(Self { ranges, i: 0 })
     }
+}
 
-    Ok(ranges)
+impl UriHashResolver for ZipHashResolver {
+    fn resolve(&mut self, _uri_map: &UriHashedDataMap) -> Vec<HashRange> {
+        let range = self.ranges[self.i].clone();
+        self.i += 1;
+        vec![range]
+    }
 }

From 9e4af21033cf5121ecf6dca849bf4777b00315de Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 14:36:53 -0400
Subject: [PATCH 04/21] Rework collection hash assertion

---
 sdk/src/assertions/collection_hash.rs | 162 +++++++++++++++++++++-----
 sdk/src/assertions/mod.rs             |   2 +-
 sdk/src/asset_handlers/zip_io.rs      |  64 +++++-----
 3 files changed, 164 insertions(+), 64 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 7c4695f75..f183d6927 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -1,16 +1,19 @@
-use std::io::{Read, Seek};
+use std::{
+    fs::File,
+    io::{Read, Seek},
+    path::{Path, PathBuf},
+};
 
 use serde::{Deserialize, Serialize};
 
 use crate::{
-    assertions::AssetType,
-    asset_handlers::zip_io::{self, ZipHashResolver},
-    hash_stream_by_alg, CAIRead, Error, HashRange, Result,
+    assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg,
+    hash_utils::verify_stream_by_alg, Error, HashRange, Result,
 };
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct CollectionHash {
-    pub uri_maps: Vec<UriHashedDataMap>,
+    pub uris: Vec<UriHashedDataMap>,
 
     #[serde(skip_serializing_if = "Option::is_none")]
     pub alg: Option<String>,
@@ -21,7 +24,7 @@ pub struct CollectionHash {
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct UriHashedDataMap {
-    pub uri: String,
+    pub uri: PathBuf,
 
     #[serde(with = "serde_bytes")]
     pub hash: Vec<u8>,
@@ -34,45 +37,63 @@ pub struct UriHashedDataMap {
 
     #[serde(skip_serializing_if = "Option::is_none")]
     pub data_types: Option<Vec<AssetType>>,
+
+    // When parsing zips we can cache the hash ranges as well in one shot.
+    #[serde(skip)]
+    pub(crate) zip_inclusion: Option<HashRange>,
 }
 
 impl CollectionHash {
     pub fn new(alg: String) -> Self {
         CollectionHash {
-            uri_maps: Vec::new(),
+            uris: Vec::new(),
             alg: Some(alg),
             zip_central_directory_hash: None,
         }
     }
 
     pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
-        self.uri_maps.push(uri_map);
+        self.uris.push(uri_map);
     }
 
-    pub fn gen_hash_from_stream<R, T>(&mut self, stream: &mut R, mut resolver: T) -> Result<()>
+    // TODO: is it safe to assume self.uris includes the stream that's being embedded into? or should
+    //       we pass it as a param?
+    pub fn gen_hash<R>(&mut self, base_path: &Path) -> Result<()>
     where
         R: Read + Seek + ?Sized,
-        T: UriHashResolver,
     {
-        let alg = self.alg();
-        for uri_map in &mut self.uri_maps {
-            let inclusions = resolver.resolve(uri_map);
-            let hash = hash_stream_by_alg(&alg, stream, Some(inclusions), false)?;
-            if hash.is_empty() {
-                return Err(Error::BadParam("could not generate data hash".to_string()));
-            }
-
-            uri_map.hash = hash;
+        let alg = self.alg().to_owned();
+        for uri_map in &mut self.uris {
+            let path = base_path.join(&uri_map.uri);
+            let mut file = File::open(path)?;
+            let file_len = file.metadata()?.len();
+
+            uri_map.hash = hash_stream_by_alg(
+                &alg,
+                &mut file,
+                // TODO: temp unwrap
+                #[allow(clippy::unwrap_used)]
+                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                false,
+            )?;
         }
 
         Ok(())
     }
 
+    pub fn gen_uris_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        self.uris = zip_io::uri_inclusions(stream)?;
+        Ok(())
+    }
+
     pub fn gen_hash_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
-        let alg = self.alg();
+        let alg = self.alg().to_owned();
 
         let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
         let zip_central_directory_hash =
@@ -82,24 +103,105 @@ impl CollectionHash {
         }
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
-        let resolver = ZipHashResolver::new(stream, &self.uri_maps)?;
-        self.gen_hash_from_stream(stream, resolver)?;
+        for uri_map in self.uris.iter_mut() {
+            match &uri_map.zip_inclusion {
+                Some(inclusion) => {
+                    let hash =
+                        hash_stream_by_alg(&alg, stream, Some(vec![inclusion.clone()]), false)?;
+                    if hash.is_empty() {
+                        return Err(Error::BadParam("could not generate data hash".to_string()));
+                    }
+
+                    uri_map.hash = hash;
+                }
+                None => {
+                    return Err(Error::BadParam(
+                        "must generate zip stream uris before generating hashes".to_owned(),
+                    ))
+                }
+            }
+        }
 
         Ok(())
     }
 
-    pub fn verify_stream_hash(&self, reader: &mut dyn CAIRead, alg: Option<&str>) -> Result<()> {
+    pub fn verify_stream_hash<R>(&self, alg: Option<&str>, base_path: &Path) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        let alg = alg.unwrap_or_else(|| self.alg());
+        for uri_map in &self.uris {
+            let path = base_path.join(&uri_map.uri);
+            let mut file = File::open(&path)?;
+            let file_len = file.metadata()?.len();
+
+            if !verify_stream_by_alg(
+                alg,
+                &uri_map.hash,
+                &mut file,
+                // TODO: temp unwrap
+                #[allow(clippy::unwrap_used)]
+                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                false,
+            ) {
+                return Err(Error::HashMismatch(format!(
+                    "hash for {} does not match",
+                    path.display()
+                )));
+            }
+        }
+
         Ok(())
     }
 
-    fn alg(&self) -> String {
-        match self.alg {
-            Some(ref a) => a.clone(),
-            None => "sha256".to_string(),
+    pub fn verify_zip_stream_hash<R>(&self, stream: &mut R, alg: Option<&str>) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        let alg = alg.unwrap_or_else(|| self.alg());
+        let central_directory_hash = match &self.zip_central_directory_hash {
+            Some(hash) => Ok(hash),
+            None => Err(Error::BadParam(
+                "Missing zip central directory hash".to_owned(),
+            )),
+        }?;
+        let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
+        if !verify_stream_by_alg(
+            alg,
+            central_directory_hash,
+            stream,
+            Some(zip_central_directory_inclusions),
+            false,
+        ) {
+            return Err(Error::HashMismatch(
+                "Hashes do not match for zip central directory".to_owned(),
+            ));
+        }
+
+        // TODO: we don't need to generate new uri maps, only ranges, and we only need the ranges for the
+        //       files that exist in the uri_map, or should we always do all of them?
+        let uris = zip_io::uri_inclusions(stream)?;
+        for (uri_map, uri_map_inclusion) in self.uris.iter().zip(uris) {
+            if !verify_stream_by_alg(
+                alg,
+                &uri_map.hash,
+                stream,
+                // Safe to unwrap because zip_io::uri_inclusions guarantees this field to be valid.
+                #[allow(clippy::unwrap_used)]
+                Some(vec![uri_map_inclusion.zip_inclusion.unwrap()]),
+                false,
+            ) {
+                return Err(Error::HashMismatch(format!(
+                    "hash for {} does not match",
+                    uri_map.uri.display()
+                )));
+            }
         }
+
+        Ok(())
     }
-}
 
-pub trait UriHashResolver {
-    fn resolve(&mut self, uri_map: &UriHashedDataMap) -> Vec<HashRange>;
+    fn alg(&self) -> &str {
+        self.alg.as_deref().unwrap_or("sha256")
+    }
 }
diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs
index 067c893c1..8c71b3993 100644
--- a/sdk/src/assertions/mod.rs
+++ b/sdk/src/assertions/mod.rs
@@ -26,7 +26,7 @@ mod data_hash;
 pub use data_hash::DataHash;
 
 mod collection_hash;
-pub use collection_hash::{CollectionHash, UriHashResolver, UriHashedDataMap};
+pub use collection_hash::{CollectionHash, UriHashedDataMap};
 
 mod creative_work;
 pub use creative_work::CreativeWork;
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index b81cb03b8..34eeeefae 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -8,7 +8,7 @@ use tempfile::Builder;
 use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};
 
 use crate::{
-    assertions::{UriHashResolver, UriHashedDataMap},
+    assertions::UriHashedDataMap,
     asset_io::{
         self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
         HashObjectPositions,
@@ -246,40 +246,38 @@ where
     todo!()
 }
 
-pub struct ZipHashResolver {
-    ranges: Vec<HashRange>,
-    i: usize,
-}
+pub fn uri_inclusions<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
+where
+    R: Read + Seek + ?Sized,
+{
+    let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
 
-impl ZipHashResolver {
-    pub fn new<R: Read + Seek + ?Sized>(
-        stream: &mut R,
-        uri_maps: &[UriHashedDataMap],
-    ) -> Result<Self> {
-        let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
-
-        let mut ranges = Vec::new();
-        for uri_map in uri_maps {
-            let index = reader
-                .index_for_path(Path::new(&uri_map.uri))
-                .ok_or(Error::JumbfNotFound)?;
-            let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
-            // TODO: hash from header or data? does compressed_size include header?
-            //       and fix error type
-            ranges.push(HashRange::new(
-                usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
-                usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
-            ));
-        }
+    let mut ranges = Vec::new();
+    let file_names: Vec<String> = reader.file_names().map(|n| n.to_owned()).collect();
+    for file_name in file_names {
+        let file = reader
+            .by_name(&file_name)
+            .map_err(|_| Error::JumbfNotFound)?;
 
-        Ok(Self { ranges, i: 0 })
+        if !file.is_dir() {
+            ranges.push(UriHashedDataMap {
+                // TODO: temp unwrap
+                #[allow(clippy::unwrap_used)]
+                uri: file.enclosed_name().unwrap(),
+                hash: Vec::new(),
+                // TODO: same here
+                size: Some(file.header_start() - file.compressed_size()),
+                dc_format: None,  // TODO
+                data_types: None, // TODO
+                // TODO: hash from header or data? does compressed_size include header?
+                //       and fix error type
+                zip_inclusion: Some(HashRange::new(
+                    usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
+                    usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
+                )),
+            });
+        }
     }
-}
 
-impl UriHashResolver for ZipHashResolver {
-    fn resolve(&mut self, _uri_map: &UriHashedDataMap) -> Vec<HashRange> {
-        let range = self.ranges[self.i].clone();
-        self.i += 1;
-        vec![range]
-    }
+    Ok(ranges)
 }

From 6559e6fe6c093b6fe85ef86eb6aa18b93bd65f4b Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 15:06:50 -0400
Subject: [PATCH 05/21] More collection assertion validation

---
 sdk/src/assertions/collection_hash.rs | 47 ++++++++-------------------
 sdk/src/asset_handlers/zip_io.rs      | 39 +++++++++++++++++-----
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index f183d6927..490c8ffab 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -37,10 +37,6 @@ pub struct UriHashedDataMap {
 
     #[serde(skip_serializing_if = "Option::is_none")]
     pub data_types: Option<Vec<AssetType>>,
-
-    // When parsing zips we can cache the hash ranges as well in one shot.
-    #[serde(skip)]
-    pub(crate) zip_inclusion: Option<HashRange>,
 }
 
 impl CollectionHash {
@@ -81,11 +77,13 @@ impl CollectionHash {
         Ok(())
     }
 
+    // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all
+    // possible valid URIs — we don't want duplicates!
     pub fn gen_uris_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
-        self.uris = zip_io::uri_inclusions(stream)?;
+        self.uris = zip_io::uri_maps(stream)?;
         Ok(())
     }
 
@@ -103,23 +101,14 @@ impl CollectionHash {
         }
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
-        for uri_map in self.uris.iter_mut() {
-            match &uri_map.zip_inclusion {
-                Some(inclusion) => {
-                    let hash =
-                        hash_stream_by_alg(&alg, stream, Some(vec![inclusion.clone()]), false)?;
-                    if hash.is_empty() {
-                        return Err(Error::BadParam("could not generate data hash".to_string()));
-                    }
-
-                    uri_map.hash = hash;
-                }
-                None => {
-                    return Err(Error::BadParam(
-                        "must generate zip stream uris before generating hashes".to_owned(),
-                    ))
-                }
+        let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?;
+        for (uri_map, hash_range) in self.uris.iter_mut().zip(hash_ranges) {
+            let hash = hash_stream_by_alg(&alg, stream, Some(vec![hash_range]), false)?;
+            if hash.is_empty() {
+                return Err(Error::BadParam("could not generate data hash".to_string()));
             }
+
+            uri_map.hash = hash;
         }
 
         Ok(())
@@ -178,19 +167,9 @@ impl CollectionHash {
             ));
         }
 
-        // TODO: we don't need to generate new uri maps, only ranges, and we only need the ranges for the
-        //       files that exist in the uri_map, or should we always do all of them?
-        let uris = zip_io::uri_inclusions(stream)?;
-        for (uri_map, uri_map_inclusion) in self.uris.iter().zip(uris) {
-            if !verify_stream_by_alg(
-                alg,
-                &uri_map.hash,
-                stream,
-                // Safe to unwrap because zip_io::uri_inclusions guarantees this field to be valid.
-                #[allow(clippy::unwrap_used)]
-                Some(vec![uri_map_inclusion.zip_inclusion.unwrap()]),
-                false,
-            ) {
+        let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?;
+        for (uri_map, hash_range) in self.uris.iter().zip(hash_ranges) {
+            if !verify_stream_by_alg(alg, &uri_map.hash, stream, Some(vec![hash_range]), false) {
                 return Err(Error::HashMismatch(format!(
                     "hash for {} does not match",
                     uri_map.uri.display()
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 34eeeefae..3f0c10c37 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -233,6 +233,7 @@ impl ZipIO {
     }
 }
 
+// TODO: probably doesn't need to return a vec
 pub fn central_directory_inclusions<R>(reader: &mut R) -> Result<Vec<HashRange>>
 where
     R: Read + Seek + ?Sized,
@@ -246,13 +247,13 @@ where
     todo!()
 }
 
-pub fn uri_inclusions<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
+pub fn uri_maps<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
 where
     R: Read + Seek + ?Sized,
 {
     let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
 
-    let mut ranges = Vec::new();
+    let mut uri_maps = Vec::new();
     let file_names: Vec<String> = reader.file_names().map(|n| n.to_owned()).collect();
     for file_name in file_names {
         let file = reader
@@ -260,7 +261,7 @@ where
             .map_err(|_| Error::JumbfNotFound)?;
 
         if !file.is_dir() {
-            ranges.push(UriHashedDataMap {
+            uri_maps.push(UriHashedDataMap {
                 // TODO: temp unwrap
                 #[allow(clippy::unwrap_used)]
                 uri: file.enclosed_name().unwrap(),
@@ -269,15 +270,35 @@ where
                 size: Some(file.header_start() - file.compressed_size()),
                 dc_format: None,  // TODO
                 data_types: None, // TODO
-                // TODO: hash from header or data? does compressed_size include header?
-                //       and fix error type
-                zip_inclusion: Some(HashRange::new(
-                    usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
-                    usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
-                )),
             });
         }
     }
 
+    Ok(uri_maps)
+}
+
+pub fn uri_inclusions<R>(stream: &mut R, uri_maps: &[UriHashedDataMap]) -> Result<Vec<HashRange>>
+where
+    R: Read + Seek + ?Sized,
+{
+    let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
+
+    let mut ranges = Vec::new();
+    for uri_map in uri_maps {
+        let index = reader
+            .index_for_path(&uri_map.uri)
+            .ok_or(Error::JumbfNotFound)?;
+        let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
+
+        if !file.is_dir() {
+            // TODO: hash from header or data? does compressed_size include header?
+            //       and fix error type
+            ranges.push(HashRange::new(
+                usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
+                usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
+            ));
+        }
+    }
+
     Ok(ranges)
 }

From ac1a12d1c1373030c591e1502fb11ef7a51b2550 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 15:25:21 -0400
Subject: [PATCH 06/21] Fix ZIP unit tests

---
 sdk/src/assertions/collection_hash.rs | 58 +++++++++++++--------------
 sdk/src/asset_handlers/zip_io.rs      | 24 +++++------
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 490c8ffab..87a315d5c 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -77,6 +77,35 @@ impl CollectionHash {
         Ok(())
     }
 
+    pub fn verify_hash<R>(&self, alg: Option<&str>, base_path: &Path) -> Result<()>
+    where
+        R: Read + Seek + ?Sized,
+    {
+        let alg = alg.unwrap_or_else(|| self.alg());
+        for uri_map in &self.uris {
+            let path = base_path.join(&uri_map.uri);
+            let mut file = File::open(&path)?;
+            let file_len = file.metadata()?.len();
+
+            if !verify_stream_by_alg(
+                alg,
+                &uri_map.hash,
+                &mut file,
+                // TODO: temp unwrap
+                #[allow(clippy::unwrap_used)]
+                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                false,
+            ) {
+                return Err(Error::HashMismatch(format!(
+                    "hash for {} does not match",
+                    path.display()
+                )));
+            }
+        }
+
+        Ok(())
+    }
+
     // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all
     // possible valid URIs — we don't want duplicates!
     pub fn gen_uris_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
@@ -114,35 +143,6 @@ impl CollectionHash {
         Ok(())
     }
 
-    pub fn verify_stream_hash<R>(&self, alg: Option<&str>, base_path: &Path) -> Result<()>
-    where
-        R: Read + Seek + ?Sized,
-    {
-        let alg = alg.unwrap_or_else(|| self.alg());
-        for uri_map in &self.uris {
-            let path = base_path.join(&uri_map.uri);
-            let mut file = File::open(&path)?;
-            let file_len = file.metadata()?.len();
-
-            if !verify_stream_by_alg(
-                alg,
-                &uri_map.hash,
-                &mut file,
-                // TODO: temp unwrap
-                #[allow(clippy::unwrap_used)]
-                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
-                false,
-            ) {
-                return Err(Error::HashMismatch(format!(
-                    "hash for {} does not match",
-                    path.display()
-                )));
-            }
-        }
-
-        Ok(())
-    }
-
     pub fn verify_zip_stream_hash<R>(&self, stream: &mut R, alg: Option<&str>) -> Result<()>
     where
         R: Read + Seek + ?Sized,
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 3f0c10c37..cff59f566 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -177,11 +177,11 @@ impl AssetIO for ZipIO {
             "pptx",
             "application/vnd.openxmlformats-officedocument.presentationml.presentation",
             "docm",
-            "application/vnd.ms-word.document.macroEnabled.12",
+            "application/vnd.ms-word.document.macroenabled.12",
             "xlsm",
-            "application/vnd.ms-excel.sheet.macroEnabled.12",
+            "application/vnd.ms-excel.sheet.macroenabled.12",
             "pptm",
-            "application/vnd.ms-powerpoint.presentation.macroEnabled.12",
+            "application/vnd.ms-powerpoint.presentation.macroenabled.12",
             // Open Document
             "odt",
             "application/vnd.oasis.opendocument.text",
@@ -207,6 +207,15 @@ impl AssetIO for ZipIO {
 }
 
 impl ZipIO {
+    fn reader<'a>(
+        &self,
+        input_stream: &'a mut dyn CAIRead,
+    ) -> ZipResult<ZipArchive<CAIReadWrapper<'a>>> {
+        ZipArchive::new(CAIReadWrapper {
+            reader: input_stream,
+        })
+    }
+
     fn writer<'a>(
         &self,
         input_stream: &'a mut dyn CAIRead,
@@ -222,15 +231,6 @@ impl ZipIO {
 
         Ok(writer)
     }
-
-    fn reader<'a>(
-        &self,
-        input_stream: &'a mut dyn CAIRead,
-    ) -> ZipResult<ZipArchive<CAIReadWrapper<'a>>> {
-        ZipArchive::new(CAIReadWrapper {
-            reader: input_stream,
-        })
-    }
 }
 
 // TODO: probably doesn't need to return a vec

From d1f3eab973492ec12e16d5bbf882ca68808d8711 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 15:30:39 -0400
Subject: [PATCH 07/21] Update memchr dep

---
 sdk/Cargo.toml                 |   2 +-
 sdk/tests/fixtures/sample1.zip | Bin 0 -> 2934 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 sdk/tests/fixtures/sample1.zip

diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index aed7018c3..16f6a5a9d 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -96,7 +96,7 @@ jfifdump = "0.5.1"
 log = "0.4.8"
 lopdf = { version = "0.31.0", optional = true }
 lazy_static = "1.4.0"
-memchr = "2.7.1"
+memchr = "2.7.4"
 multibase = "0.9.0"
 multihash = "0.11.4"
 mp4 = "0.13.0"
diff --git a/sdk/tests/fixtures/sample1.zip b/sdk/tests/fixtures/sample1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..29dec9efab522515c1df0df5b65f4b12f2b2a852
GIT binary patch
literal 2934
zcmWIWW@Zs#0D+e3ml0qFlwe~}U?@o~E-};(4dG>A_w4UY1K~&@F0J5ZU}X8u$iM*B
z8vr(ggMovg2+0f<pgH_-Gr%V4l~j~~4buP`;s?Ya8qF|mPhCAmh5&DN4xYOD8*D&B
zKnCHkf{B4a3TQ=qysx9Pe{h68#Jzg)*lkP3v@JP7G&wyXDdB^!PuK_ku#SKRrU??v
z>H^Y+1_p6VmmKVbxtRkG7^<jn9AbKz5W;9Aw?MP8^Rux*hx+s9uT;fEo-BFQx+HAM
zj%TXRm!~{i+C0@zikXv<jZL#<)|?}cCS7`TWy+KzlR}<0By<$^oO3wDoFU^Bo0n9?
z=dw=PBgDvHhVsd!(^Qzgsz;^%-t|L8ZRxWk;UP=XRi=kbTee(m*3zXjmducNDxyA3
zZ1(f1Os3PPEqNIgAuTmqLULAg)Ql5fzb|X#2|MQ-$bS2R#`9aZ9+h9HPdZ<!{dE1_
z-SdRReO^2W);TV`*)vR>f6kq66?b2LJG++Mx9+-`yS&bcUzN4tk4qP?)nAvliDPs8
zmgA2CYn<n~eZR%$xln(7w)v@Rb}R1}dfR0s{=bu3C3&xHvj3ZBALoh-?kG*V@`gKj
z*@oQnljhi;uHy~=dg|Eh`tbE99@R~l@;%{D`xn*jX*;7d<>Jg`!Et$T%}dS{P<jEz
z7$`2efQf~HAp<G70J&(%#R#5UM1T=z2E-s5ExCXa3P>vqAQB34dgcd8qoze;xZ$8g
z4N8k38qIJ}S`5@^6b5^N`Ku#4C)gb1<cMqw$a6T8Bc_GJFF8hh10~0MI`ZjU+ZG0%
z+q(7W`vw1x%zGVnD!#hhR#k7tg@eoM&Av5Go5sX1vhT*8y(^}cA5Wj_acABfU+cTk
zN!lsbZ*GveynkcU%hQjUUJBjxo&E3Mlf64%YJaZ$x9WoC`J~GGM_kxCbhWqc`?|nj
zf41f9u=S5#wSRS&6K}B(`uF+P&Vw~;pFF<t-Fh#dO>)J_+#i2Voae2U-u*1>v&i?g
zd$U==QOO-S-ys<zH71~?Mow7a1y3q0z$D@g#2^|isbEispj6q3W-P2+HG-6@K|r%W
zI0T!Spt2cMu9hL00ZM}~ry;V4AyB&y5QAtmr$JINqY&8Zca>X~K}<kS#mMG^Qn3hf
zp$aKZF|8VY={Ms$DE-c>vUHQoab7$pJG=Y7<Nr?E>uaa%|MsrRE5=}9v;U)6Sw5-d
zd&2Kq?|mS9-MH5O>$)AiX8+eJH|5Jt6q`P4%CAS|f5nfT){Z;3<!))=(kmj2M~@wk
z{k_Wm)Y|=*C;k87dMx{z-o7(Zv&+?)i!BeE&wqF|V}0DV@^vrcSH^k1zdrk&&IDtV
z^7<3Wi$aU{h2{9ZYMpsnXX!uISc|)&ph#oq_?dLa7Lk4#nM9azSK*)}2m)^%K_syi
zI>by^m5y8!gNjiYSkmYMHxql6i=~1O@J2NpxoQBFo-nYaaXFAle$@`Lhl?3^MT4-X
z7?|QA0Rtit2_0{>gYX2X3P7$LP($bfis6{G0G4V8IfOulBNtbwA!NpkBZM&R!IE2%
zLkL^phX^4?R%i$jR}>;rB(}m2X6#$X2&l2d6orWJ1{GGw#TckCgn=cEb6{q|or<Su
sL=I_?vB((<R0hJplEyzkW8rC$gc32pn-$cCW#9tB`M}sZ$PVHG0Lxu~G5`Po

literal 0
HcmV?d00001


From 9b5be12326e246b4133806cf7b991be71fd139c3 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 15:32:20 -0400
Subject: [PATCH 08/21] Update memchr dep in make_test_images

---
 make_test_images/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/make_test_images/Cargo.toml b/make_test_images/Cargo.toml
index 1ae12fa1f..06f53e559 100644
--- a/make_test_images/Cargo.toml
+++ b/make_test_images/Cargo.toml
@@ -18,7 +18,7 @@ image = { version = "0.24.7", default-features = false, features = [
 	"jpeg",
 	"png",
 ] }
-memchr = "2.7.1"
+memchr = "2.7.4"
 nom = "7.1.3"
 regex = "1.5.6"
 serde = "1.0.197"

From f57b83261d107a2ef94ef138d6d6305ddb63a679 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Tue, 9 Jul 2024 16:17:08 -0400
Subject: [PATCH 09/21] Add ZIP unit tests

---
 sdk/src/asset_handlers/zip_io.rs | 129 +++++++++++++++++++++++++------
 1 file changed, 104 insertions(+), 25 deletions(-)

diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index cff59f566..924d8ade5 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -5,7 +5,11 @@ use std::{
 };
 
 use tempfile::Builder;
-use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};
+use zip::{
+    result::{ZipError, ZipResult},
+    write::SimpleFileOptions,
+    CompressionMethod, ZipArchive, ZipWriter,
+};
 
 use crate::{
     assertions::UriHashedDataMap,
@@ -30,17 +34,30 @@ impl CAIWriter for ZipIO {
             .writer(input_stream, output_stream)
             .map_err(|_| Error::EmbeddingError)?;
 
-        // TODO: what happens if the dir exists?
-        writer
-            .add_directory("META-INF", SimpleFileOptions::default())
-            .map_err(|_| Error::EmbeddingError)?;
+        match writer.add_directory("META-INF", SimpleFileOptions::default()) {
+            Err(ZipError::InvalidArchive("Duplicate filename")) => {}
+            Err(_) => return Err(Error::EmbeddingError),
+            _ => {}
+        }
+
+        match writer.start_file_from_path(
+            Path::new("META-INF/content_credential.c2pa"),
+            SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
+        ) {
+            Err(ZipError::InvalidArchive("Duplicate filename")) => {
+                writer.abort_file().map_err(|_| Error::EmbeddingError)?;
+                // TODO: remove code duplication
+                writer
+                    .start_file_from_path(
+                        Path::new("META-INF/content_credential.c2pa"),
+                        SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
+                    )
+                    .map_err(|_| Error::EmbeddingError)?;
+            }
+            Err(_) => return Err(Error::EmbeddingError),
+            _ => {}
+        }
 
-        writer
-            .start_file_from_path(
-                Path::new("META-INF/content_credential.c2pa"),
-                SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
-            )
-            .map_err(|_| Error::EmbeddingError)?;
         io::copy(&mut store_bytes, &mut writer)?;
         writer.finish().map_err(|_| Error::EmbeddingError)?;
 
@@ -64,12 +81,14 @@ impl CAIWriter for ZipIO {
             .writer(input_stream, output_stream)
             .map_err(|_| Error::EmbeddingError)?;
 
-        writer
-            .start_file_from_path(
-                Path::new("META-INF/content_credential.c2pa"),
-                SimpleFileOptions::default(),
-            )
-            .map_err(|_| Error::EmbeddingError)?;
+        match writer.start_file_from_path(
+            Path::new("META-INF/content_credential.c2pa"),
+            SimpleFileOptions::default(),
+        ) {
+            Err(ZipError::InvalidArchive("Duplicate filename")) => {}
+            Err(_) => return Err(Error::EmbeddingError),
+            _ => {}
+        }
         writer.abort_file().map_err(|_| Error::EmbeddingError)?;
         writer.finish().map_err(|_| Error::EmbeddingError)?;
 
@@ -221,15 +240,12 @@ impl ZipIO {
         input_stream: &'a mut dyn CAIRead,
         output_stream: &'a mut dyn CAIReadWrite,
     ) -> ZipResult<ZipWriter<CAIReadWriteWrapper<'a>>> {
-        let mut writer = ZipWriter::new_append(CAIReadWriteWrapper {
-            reader_writer: output_stream,
-        })?;
+        input_stream.rewind()?;
+        io::copy(input_stream, output_stream)?;
 
-        writer.merge_archive(ZipArchive::new(CAIReadWrapper {
-            reader: input_stream,
-        })?)?;
-
-        Ok(writer)
+        ZipWriter::new_append(CAIReadWriteWrapper {
+            reader_writer: output_stream,
+        })
     }
 }
 
@@ -302,3 +318,66 @@ where
 
     Ok(ranges)
 }
+
+#[cfg(test)]
+mod tests {
+    use io::{Cursor, Seek};
+
+    use super::*;
+
+    const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
+
+    #[test]
+    fn test_write_bytes() -> Result<()> {
+        let mut stream = Cursor::new(SAMPLE1);
+
+        let zip_io = ZipIO {};
+
+        assert!(matches!(
+            zip_io.read_cai(&mut stream),
+            Err(Error::JumbfNotFound)
+        ));
+
+        let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7));
+        let random_bytes = [1, 2, 3, 4, 3, 2, 1];
+        zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?;
+
+        let data_written = zip_io.read_cai(&mut output_stream)?;
+        assert_eq!(data_written, random_bytes);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_write_bytes_replace() -> Result<()> {
+        let mut stream = Cursor::new(SAMPLE1);
+
+        let zip_io = ZipIO {};
+
+        assert!(matches!(
+            zip_io.read_cai(&mut stream),
+            Err(Error::JumbfNotFound)
+        ));
+
+        let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7));
+        let random_bytes = [1, 2, 3, 4, 3, 2, 1];
+        zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?;
+
+        let data_written = zip_io.read_cai(&mut output_stream1)?;
+        assert_eq!(data_written, random_bytes);
+
+        let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 5));
+        let random_bytes = [3, 2, 1, 2, 3];
+        zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?;
+
+        let data_written = zip_io.read_cai(&mut output_stream2)?;
+        assert_eq!(data_written, random_bytes);
+
+        let mut bytes = Vec::new();
+        stream.rewind()?;
+        stream.read_to_end(&mut bytes)?;
+        assert_eq!(SAMPLE1, bytes);
+
+        Ok(())
+    }
+}

From 770045354d7f5325edaf060167c8b50bc8ef8e57 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Wed, 10 Jul 2024 09:36:09 -0400
Subject: [PATCH 10/21] Collection Hash Assertion relative path validation

---
 sdk/src/assertions/collection_hash.rs | 27 ++++++++++++++++++++++++---
 sdk/src/asset_handlers/zip_io.rs      |  6 +++---
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 87a315d5c..c78ce70f3 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -1,7 +1,7 @@
 use std::{
     fs::File,
     io::{Read, Seek},
-    path::{Path, PathBuf},
+    path::{Component, Path, PathBuf},
 };
 
 use serde::{Deserialize, Serialize};
@@ -52,12 +52,13 @@ impl CollectionHash {
         self.uris.push(uri_map);
     }
 
-    // TODO: is it safe to assume self.uris includes the stream that's being embedded into? or should
-    //       we pass it as a param?
+    // The base path MUST be the folder of the manifest. A URI MUST NOT reference a path outside of that folder.
     pub fn gen_hash<R>(&mut self, base_path: &Path) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
+        self.validate_paths()?;
+
         let alg = self.alg().to_owned();
         for uri_map in &mut self.uris {
             let path = base_path.join(&uri_map.uri);
@@ -81,6 +82,8 @@ impl CollectionHash {
     where
         R: Read + Seek + ?Sized,
     {
+        self.validate_paths()?;
+
         let alg = alg.unwrap_or_else(|| self.alg());
         for uri_map in &self.uris {
             let path = base_path.join(&uri_map.uri);
@@ -183,4 +186,22 @@ impl CollectionHash {
     fn alg(&self) -> &str {
         self.alg.as_deref().unwrap_or("sha256")
     }
+
+    fn validate_paths(&self) -> Result<()> {
+        for uri_map in &self.uris {
+            for component in uri_map.uri.components() {
+                match component {
+                    Component::CurDir | Component::ParentDir => {
+                        return Err(Error::BadParam(format!(
+                            "URI `{}` must not contain relative components: `.` nor `..`",
+                            uri_map.uri.display()
+                        )));
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        Ok(())
+    }
 }
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 924d8ade5..3d9260595 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -338,7 +338,7 @@ mod tests {
             Err(Error::JumbfNotFound)
         ));
 
-        let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7));
+        let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7));
         let random_bytes = [1, 2, 3, 4, 3, 2, 1];
         zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?;
 
@@ -359,14 +359,14 @@ mod tests {
             Err(Error::JumbfNotFound)
         ));
 
-        let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7));
+        let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7));
         let random_bytes = [1, 2, 3, 4, 3, 2, 1];
         zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?;
 
         let data_written = zip_io.read_cai(&mut output_stream1)?;
         assert_eq!(data_written, random_bytes);
 
-        let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 5));
+        let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 5));
         let random_bytes = [3, 2, 1, 2, 3];
         zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?;
 

From 799481772f61c3db3ad8784fe6507de1d55425e6 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Wed, 10 Jul 2024 12:44:48 -0400
Subject: [PATCH 11/21] Add collection hash unit tests

---
 sdk/src/assertions/collection_hash.rs |  95 ++++++++++++++++++++++++--
 sdk/tests/fixtures/sample1.zip        | Bin 2934 -> 1096 bytes
 2 files changed, 88 insertions(+), 7 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index c78ce70f3..84a6e712e 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -11,7 +11,7 @@ use crate::{
     hash_utils::verify_stream_by_alg, Error, HashRange, Result,
 };
 
-#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Default)]
 pub struct CollectionHash {
     pub uris: Vec<UriHashedDataMap>,
 
@@ -40,12 +40,8 @@ pub struct UriHashedDataMap {
 }
 
 impl CollectionHash {
-    pub fn new(alg: String) -> Self {
-        CollectionHash {
-            uris: Vec::new(),
-            alg: Some(alg),
-            zip_central_directory_hash: None,
-        }
+    pub fn new() -> Self {
+        Self::default()
     }
 
     pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
@@ -205,3 +201,88 @@ impl CollectionHash {
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+
+    use super::*;
+
+    const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
+
+    #[test]
+    fn test_zip_uri_gen() -> Result<()> {
+        let mut stream = Cursor::new(ZIP_SAMPLE1);
+
+        let mut collection = CollectionHash::new();
+        collection.gen_uris_from_zip_stream(&mut stream)?;
+
+        assert_eq!(
+            collection.uris.first(),
+            Some(&UriHashedDataMap {
+                uri: PathBuf::from("sample1/test1.txt"),
+                hash: Vec::new(),
+                size: Some(44),
+                dc_format: None,
+                data_types: None
+            })
+        );
+        assert_eq!(
+            collection.uris.get(1),
+            Some(&UriHashedDataMap {
+                uri: PathBuf::from("sample1/test1/test1.txt"),
+                hash: Vec::new(),
+                size: Some(87),
+                dc_format: None,
+                data_types: None
+            })
+        );
+        assert_eq!(
+            collection.uris.get(2),
+            Some(&UriHashedDataMap {
+                uri: PathBuf::from("sample1/test1/test2.txt"),
+                hash: Vec::new(),
+                size: Some(148),
+                dc_format: None,
+                data_types: None
+            })
+        );
+        assert_eq!(
+            collection.uris.get(3),
+            Some(&UriHashedDataMap {
+                uri: PathBuf::from("sample1/test1/test3.txt"),
+                hash: Vec::new(),
+                size: Some(186),
+                dc_format: None,
+                data_types: None
+            })
+        );
+        assert_eq!(
+            collection.uris.get(4),
+            Some(&UriHashedDataMap {
+                uri: PathBuf::from("sample1/test2.txt"),
+                hash: Vec::new(),
+                size: Some(304),
+                dc_format: None,
+                data_types: None
+            })
+        );
+        assert_eq!(collection.uris.len(), 5);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_zip_hash_gen() -> Result<()> {
+        let mut stream = Cursor::new(ZIP_SAMPLE1);
+
+        // TODO: blocked by zip_io::central_directory_inclusions
+        // let mut collection = CollectionHash::new();
+        // collection.gen_uris_from_zip_stream(&mut stream)?;
+        // collection.gen_hash_from_zip_stream(&mut stream)?;
+
+        // TODO: assert central dir hash + uri map hashes
+
+        Ok(())
+    }
+}
diff --git a/sdk/tests/fixtures/sample1.zip b/sdk/tests/fixtures/sample1.zip
index 29dec9efab522515c1df0df5b65f4b12f2b2a852..18c5dd36e60e4ed024568c3128c220b72fe426f7 100644
GIT binary patch
literal 1096
zcmWIWW@Zs#W&ncL>X#898V2})jN-)Hf}B)C{gTw;5<~p}pjs}l+5|MUg6L}XN-9ba
z%8P)?dFtwKumF{TusFJMhy@@`Ae#^xGtlh8uF(k9Hjqb{zdEw>18o6e?3#>Wwkd$D
zgb*N$A%W3}W-)qT7$F5lIZ*9g<(6ffK)XN~Jutw|gIf~d&B$cWj4Q}hfcil|py90}
zh(?V7E(QgVAOj<V1Vh8CAVsrFw-{kG1DlWaGaw&gKCWnjnXk~Wq)`WG5{mht5J9$r
zA<F27DaZ=(7A25X*sQ>eT4XDt(XBv4G>Szn%XyJ4z-AF<Qb4w73Ra82A&+9wq1}vN
zi}wE)MYa$*&@ht^vPCDcS_Do^$QGf;3aXEAr7@UApxEbSM2#IIePr_)OgE^3JpiMz
y#6D)agqaVDeM@xn(PO{i9E&i>iiSB)zzGZC1y(j-ATY24;Xx(_hC-l03=9DOg!?K0

literal 2934
zcmWIWW@Zs#0D+e3ml0qFlwe~}U?@o~E-};(4dG>A_w4UY1K~&@F0J5ZU}X8u$iM*B
z8vr(ggMovg2+0f<pgH_-Gr%V4l~j~~4buP`;s?Ya8qF|mPhCAmh5&DN4xYOD8*D&B
zKnCHkf{B4a3TQ=qysx9Pe{h68#Jzg)*lkP3v@JP7G&wyXDdB^!PuK_ku#SKRrU??v
z>H^Y+1_p6VmmKVbxtRkG7^<jn9AbKz5W;9Aw?MP8^Rux*hx+s9uT;fEo-BFQx+HAM
zj%TXRm!~{i+C0@zikXv<jZL#<)|?}cCS7`TWy+KzlR}<0By<$^oO3wDoFU^Bo0n9?
z=dw=PBgDvHhVsd!(^Qzgsz;^%-t|L8ZRxWk;UP=XRi=kbTee(m*3zXjmducNDxyA3
zZ1(f1Os3PPEqNIgAuTmqLULAg)Ql5fzb|X#2|MQ-$bS2R#`9aZ9+h9HPdZ<!{dE1_
z-SdRReO^2W);TV`*)vR>f6kq66?b2LJG++Mx9+-`yS&bcUzN4tk4qP?)nAvliDPs8
zmgA2CYn<n~eZR%$xln(7w)v@Rb}R1}dfR0s{=bu3C3&xHvj3ZBALoh-?kG*V@`gKj
z*@oQnljhi;uHy~=dg|Eh`tbE99@R~l@;%{D`xn*jX*;7d<>Jg`!Et$T%}dS{P<jEz
z7$`2efQf~HAp<G70J&(%#R#5UM1T=z2E-s5ExCXa3P>vqAQB34dgcd8qoze;xZ$8g
z4N8k38qIJ}S`5@^6b5^N`Ku#4C)gb1<cMqw$a6T8Bc_GJFF8hh10~0MI`ZjU+ZG0%
z+q(7W`vw1x%zGVnD!#hhR#k7tg@eoM&Av5Go5sX1vhT*8y(^}cA5Wj_acABfU+cTk
zN!lsbZ*GveynkcU%hQjUUJBjxo&E3Mlf64%YJaZ$x9WoC`J~GGM_kxCbhWqc`?|nj
zf41f9u=S5#wSRS&6K}B(`uF+P&Vw~;pFF<t-Fh#dO>)J_+#i2Voae2U-u*1>v&i?g
zd$U==QOO-S-ys<zH71~?Mow7a1y3q0z$D@g#2^|isbEispj6q3W-P2+HG-6@K|r%W
zI0T!Spt2cMu9hL00ZM}~ry;V4AyB&y5QAtmr$JINqY&8Zca>X~K}<kS#mMG^Qn3hf
zp$aKZF|8VY={Ms$DE-c>vUHQoab7$pJG=Y7<Nr?E>uaa%|MsrRE5=}9v;U)6Sw5-d
zd&2Kq?|mS9-MH5O>$)AiX8+eJH|5Jt6q`P4%CAS|f5nfT){Z;3<!))=(kmj2M~@wk
z{k_Wm)Y|=*C;k87dMx{z-o7(Zv&+?)i!BeE&wqF|V}0DV@^vrcSH^k1zdrk&&IDtV
z^7<3Wi$aU{h2{9ZYMpsnXX!uISc|)&ph#oq_?dLa7Lk4#nM9azSK*)}2m)^%K_syi
zI>by^m5y8!gNjiYSkmYMHxql6i=~1O@J2NpxoQBFo-nYaaXFAle$@`Lhl?3^MT4-X
z7?|QA0Rtit2_0{>gYX2X3P7$LP($bfis6{G0G4V8IfOulBNtbwA!NpkBZM&R!IE2%
zLkL^phX^4?R%i$jR}>;rB(}m2X6#$X2&l2d6orWJ1{GGw#TckCgn=cEb6{q|or<Su
sL=I_?vB((<R0hJplEyzkW8rC$gc32pn-$cCW#9tB`M}sZ$PVHG0Lxu~G5`Po


From 484cca821473b1e3c291a1dc4b988ba55bf69fa2 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Wed, 10 Jul 2024 13:13:08 -0400
Subject: [PATCH 12/21] Pass CI for collection hash

---
 sdk/src/assertions/collection_hash.rs | 2 +-
 sdk/src/asset_handlers/zip_io.rs      | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 84a6e712e..3bf26caf5 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -274,7 +274,7 @@ mod tests {
 
     #[test]
     fn test_zip_hash_gen() -> Result<()> {
-        let mut stream = Cursor::new(ZIP_SAMPLE1);
+        // let mut stream = Cursor::new(ZIP_SAMPLE1);
 
         // TODO: blocked by zip_io::central_directory_inclusions
         // let mut collection = CollectionHash::new();
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 3d9260595..d185bf9b3 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -256,9 +256,7 @@ where
 {
     let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
 
-    // TODO: https://github.com/zip-rs/zip2/pull/71
-    //       or
-    //       https://gitlab.com/xMAC94x/zip-core (https://github.com/zip-rs/zip2/issues/204)
+    // TODO: https://github.com/zip-rs/zip2/issues/209
 
     todo!()
 }

From 32fc67487ceb0a6c7b84751af36e16b1711afcff Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Wed, 10 Jul 2024 13:28:28 -0400
Subject: [PATCH 13/21] Fix ZIP offsets/lens

---
 sdk/src/assertions/collection_hash.rs | 10 +++++-----
 sdk/src/asset_handlers/zip_io.rs      | 13 ++++++-------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 3bf26caf5..44f0cf76e 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -222,7 +222,7 @@ mod tests {
             Some(&UriHashedDataMap {
                 uri: PathBuf::from("sample1/test1.txt"),
                 hash: Vec::new(),
-                size: Some(44),
+                size: Some(47),
                 dc_format: None,
                 data_types: None
             })
@@ -232,7 +232,7 @@ mod tests {
             Some(&UriHashedDataMap {
                 uri: PathBuf::from("sample1/test1/test1.txt"),
                 hash: Vec::new(),
-                size: Some(87),
+                size: Some(57),
                 dc_format: None,
                 data_types: None
             })
@@ -242,7 +242,7 @@ mod tests {
             Some(&UriHashedDataMap {
                 uri: PathBuf::from("sample1/test1/test2.txt"),
                 hash: Vec::new(),
-                size: Some(148),
+                size: Some(53),
                 dc_format: None,
                 data_types: None
             })
@@ -252,7 +252,7 @@ mod tests {
             Some(&UriHashedDataMap {
                 uri: PathBuf::from("sample1/test1/test3.txt"),
                 hash: Vec::new(),
-                size: Some(186),
+                size: Some(68),
                 dc_format: None,
                 data_types: None
             })
@@ -262,7 +262,7 @@ mod tests {
             Some(&UriHashedDataMap {
                 uri: PathBuf::from("sample1/test2.txt"),
                 hash: Vec::new(),
-                size: Some(304),
+                size: Some(56),
                 dc_format: None,
                 data_types: None
             })
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index d185bf9b3..5588c14e4 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -280,10 +280,9 @@ where
                 #[allow(clippy::unwrap_used)]
                 uri: file.enclosed_name().unwrap(),
                 hash: Vec::new(),
-                // TODO: same here
-                size: Some(file.header_start() - file.compressed_size()),
-                dc_format: None,  // TODO
-                data_types: None, // TODO
+                size: Some((file.data_start() + file.compressed_size()) - file.header_start()),
+                dc_format: None,
+                data_types: None,
             });
         }
     }
@@ -305,11 +304,11 @@ where
         let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
 
         if !file.is_dir() {
-            // TODO: hash from header or data? does compressed_size include header?
-            //       and fix error type
+            // TODO: fix error type
             ranges.push(HashRange::new(
                 usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
-                usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
+                usize::try_from((file.data_start() + file.compressed_size()) - file.header_start())
+                    .map_err(|_| Error::JumbfNotFound)?,
             ));
         }
     }

From d66888834cbf360a0b121eac2a5b504a74e4e1b5 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Thu, 11 Jul 2024 16:43:21 -0400
Subject: [PATCH 14/21] Collection assertion docs, optimizations, and cleanup

---
 sdk/src/assertions/collection_hash.rs | 515 ++++++++++++++++++--------
 sdk/src/assertions/labels.rs          |   5 +
 sdk/src/asset_handlers/zip_io.rs      |  72 +---
 3 files changed, 368 insertions(+), 224 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 44f0cf76e..bab00d317 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -1,142 +1,229 @@
 use std::{
-    fs::File,
+    fs::{self, File},
     io::{Read, Seek},
     path::{Component, Path, PathBuf},
 };
 
 use serde::{Deserialize, Serialize};
+use zip::ZipArchive;
 
 use crate::{
-    assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg,
-    hash_utils::verify_stream_by_alg, Error, HashRange, Result,
+    assertion::{Assertion, AssertionBase, AssertionCbor},
+    assertions::{labels::COLLECTION_HASH, AssetType},
+    hash_stream_by_alg,
+    hash_utils::verify_stream_by_alg,
+    Error, HashRange, Result,
 };
 
-#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Default)]
+// TODO: which version?
+const ASSERTION_CREATION_VERSION: usize = 2;
+
+/// A collection hash is used to hash multiple files within a collection (e.g. a folder or a zip file).
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct CollectionHash {
+    /// List of files and their metadata to include in the collection hash.
     pub uris: Vec<UriHashedDataMap>,
 
+    /// Algorithm used to hash the files.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub alg: Option<String>,
 
-    #[serde(skip_serializing_if = "Option::is_none", with = "serde_bytes")]
-    pub zip_central_directory_hash: Option<Vec<u8>>,
+    // Although this isn't explicitly defined in the spec, user's MUST specify a base path when constructing
+    // a collection hash. You may notice that zips do not require this field, so we can make it optional,
+    // but that would mean users can optionally specify it, which isn't true.
+    //
+    /// This field represents the root directory where files must be contained within. If the path is a file, it
+    /// will default to using the file's parent. For more information, read [`CollectionHash::new`][CollectionHash::new].
+    pub base_path: PathBuf,
+
+    // The user would never need to explicilty specify this field, it's always recomputed internally.
+    #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")]
+    zip_central_directory_hash: Option<Vec<u8>>,
+
+    #[serde(skip)]
+    zip_central_directory_hash_range: Option<HashRange>,
 }
 
+/// Information about a file in a [`CollectionHash`][CollectionHash].
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct UriHashedDataMap {
+    /// Path to the file included in the collection.
     pub uri: PathBuf,
 
-    #[serde(with = "serde_bytes")]
-    pub hash: Vec<u8>,
+    // Same as zip_central_directory_hash, this field is always recomputed, users would never need to specify it
+    // explicitly.
+    #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")]
+    hash: Option<Vec<u8>>,
 
+    /// Size of the file in the collection.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub size: Option<u64>,
 
+    /// Mime type of the file.
+    ///
+    /// Note that this field is specified as `dc:format` during serialization/deserialization.
     #[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")]
     pub dc_format: Option<String>,
 
+    /// Additional information about the type of data in the file.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub data_types: Option<Vec<AssetType>>,
+
+    #[serde(skip)]
+    zip_hash_range: Option<HashRange>,
 }
 
 impl CollectionHash {
-    pub fn new() -> Self {
-        Self::default()
+    pub const LABEL: &'static str = COLLECTION_HASH;
+
+    /// Create a new collection hash with the specified base path.
+    ///
+    /// A base path means that any path added to the collection will use the base path as the root. If the
+    /// added path is outside the scope of the base path, hashing will immediately result in an error.
+    ///
+    /// The base path may either be a file or a directory. However, if it s a file, it will use the parent
+    /// directory as the root.
+    pub fn new(base_path: PathBuf) -> Self {
+        Self {
+            uris: Vec::new(),
+            alg: None,
+            // TODO: if base_path is a file, then do .parent() or error?
+            base_path,
+            zip_central_directory_hash: None,
+            zip_central_directory_hash_range: None,
+        }
+    }
+
+    /// Create a new collection hash with the specified algorithm.
+    ///
+    /// For more details on base_path, read [`CollectionHash::new`][CollectionHash::new].
+    pub fn with_alg(base_path: PathBuf, alg: String) -> Self {
+        Self {
+            uris: Vec::new(),
+            alg: Some(alg),
+            base_path,
+            zip_central_directory_hash: None,
+            zip_central_directory_hash_range: None,
+        }
     }
 
-    pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
-        self.uris.push(uri_map);
+    /// Adds a new file to the collection hash.
+    ///
+    /// Note that the specified path MUST be a file, not a directory. It must also be within the scope of the
+    /// base_path. Read more on base_path in [`CollectionHash::new`][CollectionHash::new].
+    pub fn add_file(&mut self, path: PathBuf) -> Result<()> {
+        self.add_file_raw(path, None)
     }
 
-    // The base path MUST be the folder of the manifest. A URI MUST NOT reference a path outside of that folder.
-    pub fn gen_hash<R>(&mut self, base_path: &Path) -> Result<()>
+    /// Add a file with the specified data types.
+    ///
+    /// Read more on the constraints of these parameters in [`CollectionHash::add_file`][CollectionHash::add_file].
+    pub fn add_file_with_data_types(
+        &mut self,
+        path: PathBuf,
+        data_types: Vec<AssetType>,
+    ) -> Result<()> {
+        self.add_file_raw(path, Some(data_types))
+    }
+
+    /// Generate the hashes for the files in the collection.
+    pub fn gen_hash<R>(&mut self) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
-        self.validate_paths()?;
-
         let alg = self.alg().to_owned();
         for uri_map in &mut self.uris {
-            let path = base_path.join(&uri_map.uri);
-            let mut file = File::open(path)?;
-            let file_len = file.metadata()?.len();
+            let path = &uri_map.uri;
+            Self::validate_path(path)?;
 
-            uri_map.hash = hash_stream_by_alg(
+            let mut file = File::open(path)?;
+            let file_len = match uri_map.size {
+                Some(file_len) => file_len,
+                None => file.metadata()?.len(),
+            };
+            uri_map.hash = Some(hash_stream_by_alg(
                 &alg,
                 &mut file,
                 // TODO: temp unwrap
                 #[allow(clippy::unwrap_used)]
                 Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
                 false,
-            )?;
+            )?);
         }
 
         Ok(())
     }
 
-    pub fn verify_hash<R>(&self, alg: Option<&str>, base_path: &Path) -> Result<()>
+    /// Validate the hashes for the files in the collection.
+    pub fn verify_hash<R>(&self, alg: Option<&str>) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
-        self.validate_paths()?;
-
         let alg = alg.unwrap_or_else(|| self.alg());
         for uri_map in &self.uris {
-            let path = base_path.join(&uri_map.uri);
-            let mut file = File::open(&path)?;
+            let path = &uri_map.uri;
+            Self::validate_path(path)?;
+
+            let mut file = File::open(path)?;
             let file_len = file.metadata()?.len();
 
-            if !verify_stream_by_alg(
-                alg,
-                &uri_map.hash,
-                &mut file,
-                // TODO: temp unwrap
-                #[allow(clippy::unwrap_used)]
-                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
-                false,
-            ) {
-                return Err(Error::HashMismatch(format!(
-                    "hash for {} does not match",
-                    path.display()
-                )));
+            match &uri_map.hash {
+                Some(hash) => {
+                    if !verify_stream_by_alg(
+                        alg,
+                        hash,
+                        &mut file,
+                        // TODO: temp unwrap
+                        #[allow(clippy::unwrap_used)]
+                        Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                        false,
+                    ) {
+                        return Err(Error::HashMismatch(format!(
+                            "hash for {} does not match",
+                            path.display()
+                        )));
+                    }
+                }
+                None => todo!(),
             }
         }
 
         Ok(())
     }
 
-    // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all
-    // possible valid URIs — we don't want duplicates!
-    pub fn gen_uris_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
-    where
-        R: Read + Seek + ?Sized,
-    {
-        self.uris = zip_io::uri_maps(stream)?;
-        Ok(())
-    }
-
     pub fn gen_hash_from_zip_stream<R>(&mut self, stream: &mut R) -> Result<()>
     where
         R: Read + Seek + ?Sized,
     {
         let alg = self.alg().to_owned();
 
-        let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
-        let zip_central_directory_hash =
-            hash_stream_by_alg(&alg, stream, Some(zip_central_directory_inclusions), false)?;
+        let zip_central_directory_inclusions = zip_central_directory_range(stream)?;
+        let zip_central_directory_hash = hash_stream_by_alg(
+            &alg,
+            stream,
+            Some(vec![zip_central_directory_inclusions]),
+            false,
+        )?;
         if zip_central_directory_hash.is_empty() {
             return Err(Error::BadParam("could not generate data hash".to_string()));
         }
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
-        let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?;
-        for (uri_map, hash_range) in self.uris.iter_mut().zip(hash_ranges) {
-            let hash = hash_stream_by_alg(&alg, stream, Some(vec![hash_range]), false)?;
+        self.uris = zip_uri_ranges(stream)?;
+        for uri_map in &mut self.uris {
+            let hash = hash_stream_by_alg(
+                &alg,
+                stream,
+                // We always generate the zip_hash_range in zip_uri_ranges.
+                #[allow(clippy::unwrap_used)]
+                Some(vec![uri_map.zip_hash_range.clone().unwrap()]),
+                false,
+            )?;
             if hash.is_empty() {
                 return Err(Error::BadParam("could not generate data hash".to_string()));
             }
 
-            uri_map.hash = hash;
+            uri_map.hash = Some(hash);
         }
 
         Ok(())
@@ -147,18 +234,19 @@ impl CollectionHash {
         R: Read + Seek + ?Sized,
     {
         let alg = alg.unwrap_or_else(|| self.alg());
-        let central_directory_hash = match &self.zip_central_directory_hash {
+        let zip_central_directory_hash = match &self.zip_central_directory_hash {
             Some(hash) => Ok(hash),
             None => Err(Error::BadParam(
                 "Missing zip central directory hash".to_owned(),
             )),
         }?;
-        let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
         if !verify_stream_by_alg(
             alg,
-            central_directory_hash,
+            zip_central_directory_hash,
             stream,
-            Some(zip_central_directory_inclusions),
+            // If zip_central_directory_hash exists (we checked above), then this must exist.
+            #[allow(clippy::unwrap_used)]
+            Some(vec![self.zip_central_directory_hash_range.clone().unwrap()]),
             false,
         ) {
             return Err(Error::HashMismatch(
@@ -166,35 +254,70 @@ impl CollectionHash {
             ));
         }
 
-        let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?;
-        for (uri_map, hash_range) in self.uris.iter().zip(hash_ranges) {
-            if !verify_stream_by_alg(alg, &uri_map.hash, stream, Some(vec![hash_range]), false) {
-                return Err(Error::HashMismatch(format!(
-                    "hash for {} does not match",
-                    uri_map.uri.display()
-                )));
+        for uri_map in &self.uris {
+            match &uri_map.hash {
+                Some(hash) => {
+                    if !verify_stream_by_alg(
+                        alg,
+                        hash,
+                        stream,
+                        // Same reason as above.
+                        #[allow(clippy::unwrap_used)]
+                        Some(vec![uri_map.zip_hash_range.clone().unwrap()]),
+                        false,
+                    ) {
+                        return Err(Error::HashMismatch(format!(
+                            "hash for {} does not match",
+                            uri_map.uri.display()
+                        )));
+                    }
+                }
+                None => todo!(),
             }
         }
 
         Ok(())
     }
 
+    fn add_file_raw(&mut self, path: PathBuf, data_types: Option<Vec<AssetType>>) -> Result<()> {
+        // TODO: how should we handle if the path already exists in the collection?
+        Self::validate_path(&path)?;
+
+        let format = crate::format_from_path(&path);
+        let metadata = fs::metadata(&path)?;
+        self.uris.push(UriHashedDataMap {
+            uri: self.base_path.join(path),
+            hash: None,
+            size: Some(metadata.len()),
+            dc_format: format,
+            data_types,
+            zip_hash_range: None,
+        });
+
+        Ok(())
+    }
+
     fn alg(&self) -> &str {
         self.alg.as_deref().unwrap_or("sha256")
     }
 
-    fn validate_paths(&self) -> Result<()> {
-        for uri_map in &self.uris {
-            for component in uri_map.uri.components() {
-                match component {
-                    Component::CurDir | Component::ParentDir => {
-                        return Err(Error::BadParam(format!(
-                            "URI `{}` must not contain relative components: `.` nor `..`",
-                            uri_map.uri.display()
-                        )));
-                    }
-                    _ => {}
+    fn validate_path(path: &Path) -> Result<()> {
+        if !path.is_file() {
+            return Err(Error::BadParam(format!(
+                "Collection hashes must only contain files; got `{}`",
+                path.display()
+            )));
+        }
+
+        for component in path.components() {
+            match component {
+                Component::CurDir | Component::ParentDir => {
+                    return Err(Error::BadParam(format!(
+                        "URI `{}` must not contain relative components: `.` nor `..`",
+                        path.display()
+                    )));
                 }
+                _ => {}
             }
         }
 
@@ -202,87 +325,171 @@ impl CollectionHash {
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use super::*;
-
-    const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
-
-    #[test]
-    fn test_zip_uri_gen() -> Result<()> {
-        let mut stream = Cursor::new(ZIP_SAMPLE1);
-
-        let mut collection = CollectionHash::new();
-        collection.gen_uris_from_zip_stream(&mut stream)?;
-
-        assert_eq!(
-            collection.uris.first(),
-            Some(&UriHashedDataMap {
-                uri: PathBuf::from("sample1/test1.txt"),
-                hash: Vec::new(),
-                size: Some(47),
-                dc_format: None,
-                data_types: None
-            })
-        );
-        assert_eq!(
-            collection.uris.get(1),
-            Some(&UriHashedDataMap {
-                uri: PathBuf::from("sample1/test1/test1.txt"),
-                hash: Vec::new(),
-                size: Some(57),
-                dc_format: None,
-                data_types: None
-            })
-        );
-        assert_eq!(
-            collection.uris.get(2),
-            Some(&UriHashedDataMap {
-                uri: PathBuf::from("sample1/test1/test2.txt"),
-                hash: Vec::new(),
-                size: Some(53),
-                dc_format: None,
-                data_types: None
-            })
-        );
-        assert_eq!(
-            collection.uris.get(3),
-            Some(&UriHashedDataMap {
-                uri: PathBuf::from("sample1/test1/test3.txt"),
-                hash: Vec::new(),
-                size: Some(68),
-                dc_format: None,
-                data_types: None
-            })
-        );
-        assert_eq!(
-            collection.uris.get(4),
-            Some(&UriHashedDataMap {
-                uri: PathBuf::from("sample1/test2.txt"),
-                hash: Vec::new(),
-                size: Some(56),
-                dc_format: None,
-                data_types: None
-            })
-        );
-        assert_eq!(collection.uris.len(), 5);
+impl AssertionBase for CollectionHash {
+    const LABEL: &'static str = Self::LABEL;
+    const VERSION: Option<usize> = Some(ASSERTION_CREATION_VERSION);
 
-        Ok(())
+    fn from_assertion(assertion: &Assertion) -> Result<Self> {
+        Self::from_cbor_assertion(assertion)
     }
 
-    #[test]
-    fn test_zip_hash_gen() -> Result<()> {
-        // let mut stream = Cursor::new(ZIP_SAMPLE1);
+    // We don't need to check if the zip_central_directory_hash exists, because if it is a zip
+    // and one of the uri maps hashes don't exist, then that means the central dir hash doesn't exist.
+    fn to_assertion(&self) -> Result<Assertion> {
+        if self.uris.iter().any(|uri_map| uri_map.hash.is_none()) {
+            return Err(Error::BadParam(
+                "No hash found, ensure gen_hash is called".to_string(),
+            ));
+        }
 
-        // TODO: blocked by zip_io::central_directory_inclusions
-        // let mut collection = CollectionHash::new();
-        // collection.gen_uris_from_zip_stream(&mut stream)?;
-        // collection.gen_hash_from_zip_stream(&mut stream)?;
+        Self::to_cbor_assertion(self)
+    }
+}
 
-        // TODO: assert central dir hash + uri map hashes
+impl AssertionCbor for CollectionHash {}
 
-        Ok(())
+pub fn zip_central_directory_range<R>(reader: &mut R) -> Result<HashRange>
+where
+    R: Read + Seek + ?Sized,
+{
+    let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
+
+    // TODO: https://github.com/zip-rs/zip2/issues/209
+
+    todo!()
+}
+
+pub fn zip_uri_ranges<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
+where
+    R: Read + Seek + ?Sized,
+{
+    let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
+
+    let mut uri_maps = Vec::new();
+    let file_names: Vec<String> = reader.file_names().map(|n| n.to_owned()).collect();
+    for file_name in file_names {
+        let file = reader
+            .by_name(&file_name)
+            .map_err(|_| Error::JumbfNotFound)?;
+
+        if !file.is_dir() {
+            match file.enclosed_name() {
+                Some(path) => {
+                    if path != Path::new("META-INF/content_credential.c2pa") {
+                        uri_maps.push(UriHashedDataMap {
+                            dc_format: crate::format_from_path(&path),
+                            uri: path,
+                            hash: Some(Vec::new()),
+                            size: Some(
+                                (file.data_start() + file.compressed_size()) - file.header_start(),
+                            ),
+                            data_types: None,
+                            // TODO: fix error types
+                            zip_hash_range: Some(HashRange::new(
+                                usize::try_from(file.header_start())
+                                    .map_err(|_| Error::JumbfNotFound)?,
+                                usize::try_from(
+                                    (file.data_start() + file.compressed_size())
+                                        - file.header_start(),
+                                )
+                                .map_err(|_| Error::JumbfNotFound)?,
+                            )),
+                        });
+                    }
+                }
+                None => todo!(),
+            }
+        }
     }
+
+    Ok(uri_maps)
 }
+
+// TODO: blocked by central_directory_inclusions
+// #[cfg(test)]
+// mod tests {
+//     use std::io::Cursor;
+
+//     use super::*;
+
+//     const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
+
+// #[test]
+// fn test_zip_hash() -> Result<()> {
+//     let mut stream = Cursor::new(ZIP_SAMPLE1);
+
+//     let mut collection = CollectionHash {
+//         uris: Vec::new(),
+//         alg: None,
+//         zip_central_directory_hash: None,
+//         base_path: PathBuf::new(),
+//         zip_central_directory_hash_range: None,
+//     };
+//     collection.gen_hash_from_zip_stream(&mut stream)?;
+
+//     assert_eq!(collection.zip_central_directory_hash, vec![0]);
+//     assert_eq!(
+//         collection.zip_central_directory_hash_range,
+//         Some(HashRange::new(0, 0))
+//     );
+
+//     assert_eq!(
+//         collection.uris.first(),
+//         Some(&UriHashedDataMap {
+//             uri: PathBuf::from("sample1/test1.txt"),
+//             hash: Some(vec![0]),
+//             size: Some(47),
+//             dc_format: None,
+//             data_types: None,
+//             zip_hash_range: None,
+//         })
+//     );
+//     assert_eq!(
+//         collection.uris.get(1),
+//         Some(&UriHashedDataMap {
+//             uri: PathBuf::from("sample1/test1/test1.txt"),
+//             hash: Some(vec![0]),
+//             size: Some(57),
+//             dc_format: None,
+//             data_types: None,
+//             zip_hash_range: None,
+//         })
+//     );
+//     assert_eq!(
+//         collection.uris.get(2),
+//         Some(&UriHashedDataMap {
+//             uri: PathBuf::from("sample1/test1/test2.txt"),
+//             hash: Some(vec![0]),
+//             size: Some(53),
+//             dc_format: None,
+//             data_types: None,
+//             zip_hash_range: None,
+//         })
+//     );
+//     assert_eq!(
+//         collection.uris.get(3),
+//         Some(&UriHashedDataMap {
+//             uri: PathBuf::from("sample1/test1/test3.txt"),
+//             hash: Some(vec![0]),
+//             size: Some(68),
+//             dc_format: None,
+//             data_types: None,
+//             zip_hash_range: None,
+//         })
+//     );
+//     assert_eq!(
+//         collection.uris.get(4),
+//         Some(&UriHashedDataMap {
+//             uri: PathBuf::from("sample1/test2.txt"),
+//             hash: Some(vec![0]),
+//             size: Some(56),
+//             dc_format: None,
+//             data_types: None,
+//             zip_hash_range: None,
+//         })
+//     );
+//     assert_eq!(collection.uris.len(), 5);
+
+//     Ok(())
+// }
+// }
diff --git a/sdk/src/assertions/labels.rs b/sdk/src/assertions/labels.rs
index c4c5990cb..c11b8cee8 100644
--- a/sdk/src/assertions/labels.rs
+++ b/sdk/src/assertions/labels.rs
@@ -39,6 +39,11 @@ pub const DATA_HASH: &str = "c2pa.hash.data";
 /// See <https://c2pa.org/specifications/specifications/1.3/specs/C2PA_Specification.html#_general_boxes_hash>.
 pub const BOX_HASH: &str = "c2pa.hash.boxes";
 
+/// Label prefix for a collection hash assertion.
+///
+/// See <https://c2pa.org/specifications/specifications/1.4/specs/C2PA_Specification.html#_collection_data_hash>.
+pub const COLLECTION_HASH: &str = "c2pa.hash.collection.data";
+
 /// Label prefix for a BMFF-based hash assertion.
 ///
 /// See <https://c2pa.org/specifications/specifications/1.0/specs/C2PA_Specification.html#_bmff_based_hash>.
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 5588c14e4..8e27e921e 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -1,6 +1,6 @@
 use std::{
     fs::{self, File},
-    io::{self, Read, Seek},
+    io::{self, Read},
     path::Path,
 };
 
@@ -12,13 +12,12 @@ use zip::{
 };
 
 use crate::{
-    assertions::UriHashedDataMap,
     asset_io::{
         self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
         HashObjectPositions,
     },
     error::Result,
-    CAIRead, CAIReadWrite, Error, HashRange,
+    CAIRead, CAIReadWrite, Error,
 };
 
 pub struct ZipIO {}
@@ -249,73 +248,6 @@ impl ZipIO {
     }
 }
 
-// TODO: probably doesn't need to return a vec
-pub fn central_directory_inclusions<R>(reader: &mut R) -> Result<Vec<HashRange>>
-where
-    R: Read + Seek + ?Sized,
-{
-    let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
-
-    // TODO: https://github.com/zip-rs/zip2/issues/209
-
-    todo!()
-}
-
-pub fn uri_maps<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
-where
-    R: Read + Seek + ?Sized,
-{
-    let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
-
-    let mut uri_maps = Vec::new();
-    let file_names: Vec<String> = reader.file_names().map(|n| n.to_owned()).collect();
-    for file_name in file_names {
-        let file = reader
-            .by_name(&file_name)
-            .map_err(|_| Error::JumbfNotFound)?;
-
-        if !file.is_dir() {
-            uri_maps.push(UriHashedDataMap {
-                // TODO: temp unwrap
-                #[allow(clippy::unwrap_used)]
-                uri: file.enclosed_name().unwrap(),
-                hash: Vec::new(),
-                size: Some((file.data_start() + file.compressed_size()) - file.header_start()),
-                dc_format: None,
-                data_types: None,
-            });
-        }
-    }
-
-    Ok(uri_maps)
-}
-
-pub fn uri_inclusions<R>(stream: &mut R, uri_maps: &[UriHashedDataMap]) -> Result<Vec<HashRange>>
-where
-    R: Read + Seek + ?Sized,
-{
-    let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
-
-    let mut ranges = Vec::new();
-    for uri_map in uri_maps {
-        let index = reader
-            .index_for_path(&uri_map.uri)
-            .ok_or(Error::JumbfNotFound)?;
-        let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
-
-        if !file.is_dir() {
-            // TODO: fix error type
-            ranges.push(HashRange::new(
-                usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
-                usize::try_from((file.data_start() + file.compressed_size()) - file.header_start())
-                    .map_err(|_| Error::JumbfNotFound)?,
-            ));
-        }
-    }
-
-    Ok(ranges)
-}
-
 #[cfg(test)]
 mod tests {
     use io::{Cursor, Seek};

From 11bef802ae365959286f1c241c232841b3873def Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Fri, 12 Jul 2024 09:28:08 -0400
Subject: [PATCH 15/21] Cleanup collection hash errors

---
 sdk/src/assertions/collection_hash.rs | 102 ++++++++++++++++----------
 1 file changed, 65 insertions(+), 37 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index bab00d317..03148007f 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -83,28 +83,15 @@ impl CollectionHash {
     ///
     /// The base path may either be a file or a directory. However, if it s a file, it will use the parent
     /// directory as the root.
-    pub fn new(base_path: PathBuf) -> Self {
-        Self {
-            uris: Vec::new(),
-            alg: None,
-            // TODO: if base_path is a file, then do .parent() or error?
-            base_path,
-            zip_central_directory_hash: None,
-            zip_central_directory_hash_range: None,
-        }
+    pub fn new(base_path: PathBuf) -> Result<Self> {
+        Self::new_raw(base_path, None)
     }
 
     /// Create a new collection hash with the specified algorithm.
     ///
     /// For more details on base_path, read [`CollectionHash::new`][CollectionHash::new].
-    pub fn with_alg(base_path: PathBuf, alg: String) -> Self {
-        Self {
-            uris: Vec::new(),
-            alg: Some(alg),
-            base_path,
-            zip_central_directory_hash: None,
-            zip_central_directory_hash_range: None,
-        }
+    pub fn with_alg(base_path: PathBuf, alg: String) -> Result<Self> {
+        Self::new_raw(base_path, Some(alg))
     }
 
     /// Adds a new file to the collection hash.
@@ -144,9 +131,12 @@ impl CollectionHash {
             uri_map.hash = Some(hash_stream_by_alg(
                 &alg,
                 &mut file,
-                // TODO: temp unwrap
-                #[allow(clippy::unwrap_used)]
-                Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                Some(vec![HashRange::new(
+                    0,
+                    usize::try_from(file_len).map_err(|_| {
+                        Error::BadParam(format!("Value {} out of usize range", file_len))
+                    })?,
+                )]),
                 false,
             )?);
         }
@@ -173,9 +163,12 @@ impl CollectionHash {
                         alg,
                         hash,
                         &mut file,
-                        // TODO: temp unwrap
-                        #[allow(clippy::unwrap_used)]
-                        Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]),
+                        Some(vec![HashRange::new(
+                            0,
+                            usize::try_from(file_len).map_err(|_| {
+                                Error::BadParam(format!("Value {} out of usize range", file_len))
+                            })?,
+                        )]),
                         false,
                     ) {
                         return Err(Error::HashMismatch(format!(
@@ -184,7 +177,11 @@ impl CollectionHash {
                         )));
                     }
                 }
-                None => todo!(),
+                None => {
+                    return Err(Error::BadParam(
+                        "Must generate hashes before verifying".to_owned(),
+                    ));
+                }
             }
         }
 
@@ -272,13 +269,40 @@ impl CollectionHash {
                         )));
                     }
                 }
-                None => todo!(),
+                None => {
+                    return Err(Error::BadParam(
+                        "Must generate hashes before verifying".to_owned(),
+                    ));
+                }
             }
         }
 
         Ok(())
     }
 
+    fn new_raw(base_path: PathBuf, alg: Option<String>) -> Result<Self> {
+        let base_path = match base_path.is_file() {
+            true => match base_path.parent() {
+                Some(path) => path.to_path_buf(),
+                None => {
+                    return Err(Error::BadParam(
+                        "Base path must be a directory or a file with a parent directory"
+                            .to_owned(),
+                    ))
+                }
+            },
+            false => base_path,
+        };
+
+        Ok(Self {
+            uris: Vec::new(),
+            alg,
+            base_path,
+            zip_central_directory_hash: None,
+            zip_central_directory_hash_range: None,
+        })
+    }
+
     fn add_file_raw(&mut self, path: PathBuf, data_types: Option<Vec<AssetType>>) -> Result<()> {
         // TODO: how should we handle if the path already exists in the collection?
         Self::validate_path(&path)?;
@@ -376,28 +400,32 @@ where
             match file.enclosed_name() {
                 Some(path) => {
                     if path != Path::new("META-INF/content_credential.c2pa") {
+                        let start = file.header_start();
+                        let len =
+                            (file.data_start() + file.compressed_size()) - file.header_start();
                         uri_maps.push(UriHashedDataMap {
                             dc_format: crate::format_from_path(&path),
                             uri: path,
                             hash: Some(Vec::new()),
-                            size: Some(
-                                (file.data_start() + file.compressed_size()) - file.header_start(),
-                            ),
+                            size: Some(len),
                             data_types: None,
-                            // TODO: fix error types
                             zip_hash_range: Some(HashRange::new(
-                                usize::try_from(file.header_start())
-                                    .map_err(|_| Error::JumbfNotFound)?,
-                                usize::try_from(
-                                    (file.data_start() + file.compressed_size())
-                                        - file.header_start(),
-                                )
-                                .map_err(|_| Error::JumbfNotFound)?,
+                                usize::try_from(start).map_err(|_| {
+                                    Error::BadParam(format!("Value {} out of usize range", start))
+                                })?,
+                                usize::try_from(len).map_err(|_| {
+                                    Error::BadParam(format!("Value {} out of usize range", len))
+                                })?,
                             )),
                         });
                     }
                 }
-                None => todo!(),
+                None => {
+                    return Err(Error::BadParam(format!(
+                        "Invalid stored path `{}` in zip file",
+                        file_name
+                    )))
+                }
             }
         }
     }

From d383ce81b5cd178a055801b3495cf273bcbbbba5 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Fri, 12 Jul 2024 11:52:27 -0400
Subject: [PATCH 16/21] Rework collection hash and add better validation

---
 sdk/src/assertions/collection_hash.rs | 162 +++++++++++++++-----------
 sdk/src/asset_handlers/zip_io.rs      |   1 +
 2 files changed, 93 insertions(+), 70 deletions(-)

diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 03148007f..595332438 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -1,4 +1,5 @@
 use std::{
+    collections::HashMap,
     fs::{self, File},
     io::{Read, Seek},
     path::{Component, Path, PathBuf},
@@ -15,30 +16,35 @@ use crate::{
     Error, HashRange, Result,
 };
 
-// TODO: which version?
-const ASSERTION_CREATION_VERSION: usize = 2;
+const ASSERTION_CREATION_VERSION: usize = 1;
 
 /// A collection hash is used to hash multiple files within a collection (e.g. a folder or a zip file).
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct CollectionHash {
-    /// List of files and their metadata to include in the collection hash.
-    pub uris: Vec<UriHashedDataMap>,
+    // We use a hash map to avoid potential duplicates.
+    //
+    /// Map of file path to their metadata for the collection.
+    pub uris: HashMap<PathBuf, UriHashedDataMap>,
 
     /// Algorithm used to hash the files.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub alg: Option<String>,
 
-    // Although this isn't explicitly defined in the spec, user's MUST specify a base path when constructing
-    // a collection hash. You may notice that zips do not require this field, so we can make it optional,
-    // but that would mean users can optionally specify it, which isn't true.
+    // TODO: in c2patool, we need to redefine this field to also handle relative paths.
     //
     /// This field represents the root directory where files must be contained within. If the path is a file, it
     /// will default to using the file's parent. For more information, read [`CollectionHash::new`][CollectionHash::new].
-    pub base_path: PathBuf,
+    ///
+    /// While this field is marked as optional (it is not serialized as part of the spec), it is required for computing
+    /// hashes and MUST be specified.
+    #[serde(skip_serializing)]
+    pub base_path: Option<PathBuf>,
 
-    // The user would never need to explicilty specify this field, it's always recomputed internally.
+    /// Hash of the ZIP central directory.
+    ///
+    /// This field only needs to be specified if the collection hash is for a ZIP file.
     #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")]
-    zip_central_directory_hash: Option<Vec<u8>>,
+    pub zip_central_directory_hash: Option<Vec<u8>>,
 
     #[serde(skip)]
     zip_central_directory_hash_range: Option<HashRange>,
@@ -47,15 +53,15 @@ pub struct CollectionHash {
 /// Information about a file in a [`CollectionHash`][CollectionHash].
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub struct UriHashedDataMap {
-    /// Path to the file included in the collection.
-    pub uri: PathBuf,
-
-    // Same as zip_central_directory_hash, this field is always recomputed, users would never need to specify it
-    // explicitly.
+    /// Hash of the entire file contents.
+    ///
+    /// For a ZIP, the hash must span starting from the file header to the end of the compressed file data.
     #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")]
-    hash: Option<Vec<u8>>,
+    pub hash: Option<Vec<u8>>,
 
     /// Size of the file in the collection.
+    ///
+    /// For a ZIP, the size must span from the file header to the end of the compressed file data.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub size: Option<u64>,
 
@@ -119,11 +125,13 @@ impl CollectionHash {
         R: Read + Seek + ?Sized,
     {
         let alg = self.alg().to_owned();
-        for uri_map in &mut self.uris {
-            let path = &uri_map.uri;
-            Self::validate_path(path)?;
+        let base_path = self.base_path()?.to_owned();
+
+        for (path, uri_map) in &mut self.uris {
+            let path = base_path.join(path);
+            Self::validate_path(&path)?;
 
-            let mut file = File::open(path)?;
+            let mut file = File::open(&path)?;
             let file_len = match uri_map.size {
                 Some(file_len) => file_len,
                 None => file.metadata()?.len(),
@@ -150,11 +158,13 @@ impl CollectionHash {
         R: Read + Seek + ?Sized,
     {
         let alg = alg.unwrap_or_else(|| self.alg());
-        for uri_map in &self.uris {
-            let path = &uri_map.uri;
-            Self::validate_path(path)?;
+        let base_path = self.base_path()?;
 
-            let mut file = File::open(path)?;
+        for (path, uri_map) in &self.uris {
+            let path = base_path.join(path);
+            Self::validate_path(&path)?;
+
+            let mut file = File::open(&path)?;
             let file_len = file.metadata()?.len();
 
             match &uri_map.hash {
@@ -207,7 +217,7 @@ impl CollectionHash {
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
         self.uris = zip_uri_ranges(stream)?;
-        for uri_map in &mut self.uris {
+        for uri_map in self.uris.values_mut() {
             let hash = hash_stream_by_alg(
                 &alg,
                 stream,
@@ -251,7 +261,7 @@ impl CollectionHash {
             ));
         }
 
-        for uri_map in &self.uris {
+        for (path, uri_map) in &self.uris {
             match &uri_map.hash {
                 Some(hash) => {
                     if !verify_stream_by_alg(
@@ -265,7 +275,7 @@ impl CollectionHash {
                     ) {
                         return Err(Error::HashMismatch(format!(
                             "hash for {} does not match",
-                            uri_map.uri.display()
+                            path.display()
                         )));
                     }
                 }
@@ -281,42 +291,30 @@ impl CollectionHash {
     }
 
     fn new_raw(base_path: PathBuf, alg: Option<String>) -> Result<Self> {
-        let base_path = match base_path.is_file() {
-            true => match base_path.parent() {
-                Some(path) => path.to_path_buf(),
-                None => {
-                    return Err(Error::BadParam(
-                        "Base path must be a directory or a file with a parent directory"
-                            .to_owned(),
-                    ))
-                }
-            },
-            false => base_path,
-        };
-
         Ok(Self {
-            uris: Vec::new(),
+            uris: HashMap::new(),
             alg,
-            base_path,
+            base_path: Some(base_path),
             zip_central_directory_hash: None,
             zip_central_directory_hash_range: None,
         })
     }
 
     fn add_file_raw(&mut self, path: PathBuf, data_types: Option<Vec<AssetType>>) -> Result<()> {
-        // TODO: how should we handle if the path already exists in the collection?
         Self::validate_path(&path)?;
 
         let format = crate::format_from_path(&path);
         let metadata = fs::metadata(&path)?;
-        self.uris.push(UriHashedDataMap {
-            uri: self.base_path.join(path),
-            hash: None,
-            size: Some(metadata.len()),
-            dc_format: format,
-            data_types,
-            zip_hash_range: None,
-        });
+        self.uris.insert(
+            path,
+            UriHashedDataMap {
+                hash: None,
+                size: Some(metadata.len()),
+                dc_format: format,
+                data_types,
+                zip_hash_range: None,
+            },
+        );
 
         Ok(())
     }
@@ -325,6 +323,24 @@ impl CollectionHash {
         self.alg.as_deref().unwrap_or("sha256")
     }
 
+    fn base_path(&self) -> Result<&Path> {
+        match &self.base_path {
+            Some(base_path) => match base_path.is_file() {
+                true => match base_path.parent() {
+                    Some(path) => Ok(path),
+                    None => Err(Error::BadParam(
+                        "Base path must be a directory or a file with a parent directory"
+                            .to_owned(),
+                    )),
+                },
+                false => Ok(base_path),
+            },
+            None => Err(Error::BadParam(
+                "Must specify base path for collection hash".to_owned(),
+            )),
+        }
+    }
+
     fn validate_path(path: &Path) -> Result<()> {
         if !path.is_file() {
             return Err(Error::BadParam(format!(
@@ -360,7 +376,7 @@ impl AssertionBase for CollectionHash {
     // We don't need to check if the zip_central_directory_hash exists, because if it is a zip
     // and one of the uri maps hashes don't exist, then that means the central dir hash doesn't exist.
     fn to_assertion(&self) -> Result<Assertion> {
-        if self.uris.iter().any(|uri_map| uri_map.hash.is_none()) {
+        if self.uris.iter().any(|(_, uri_map)| uri_map.hash.is_none()) {
             return Err(Error::BadParam(
                 "No hash found, ensure gen_hash is called".to_string(),
             ));
@@ -383,13 +399,13 @@ where
     todo!()
 }
 
-pub fn zip_uri_ranges<R>(stream: &mut R) -> Result<Vec<UriHashedDataMap>>
+pub fn zip_uri_ranges<R>(stream: &mut R) -> Result<HashMap<PathBuf, UriHashedDataMap>>
 where
     R: Read + Seek + ?Sized,
 {
     let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?;
 
-    let mut uri_maps = Vec::new();
+    let mut uri_map = HashMap::new();
     let file_names: Vec<String> = reader.file_names().map(|n| n.to_owned()).collect();
     for file_name in file_names {
         let file = reader
@@ -403,21 +419,27 @@ where
                         let start = file.header_start();
                         let len =
                             (file.data_start() + file.compressed_size()) - file.header_start();
-                        uri_maps.push(UriHashedDataMap {
-                            dc_format: crate::format_from_path(&path),
-                            uri: path,
-                            hash: Some(Vec::new()),
-                            size: Some(len),
-                            data_types: None,
-                            zip_hash_range: Some(HashRange::new(
-                                usize::try_from(start).map_err(|_| {
-                                    Error::BadParam(format!("Value {} out of usize range", start))
-                                })?,
-                                usize::try_from(len).map_err(|_| {
-                                    Error::BadParam(format!("Value {} out of usize range", len))
-                                })?,
-                            )),
-                        });
+                        let format = crate::format_from_path(&path);
+                        uri_map.insert(
+                            path,
+                            UriHashedDataMap {
+                                hash: Some(Vec::new()),
+                                size: Some(len),
+                                dc_format: format,
+                                data_types: None,
+                                zip_hash_range: Some(HashRange::new(
+                                    usize::try_from(start).map_err(|_| {
+                                        Error::BadParam(format!(
+                                            "Value {} out of usize range",
+                                            start
+                                        ))
+                                    })?,
+                                    usize::try_from(len).map_err(|_| {
+                                        Error::BadParam(format!("Value {} out of usize range", len))
+                                    })?,
+                                )),
+                            },
+                        );
                     }
                 }
                 None => {
@@ -430,7 +452,7 @@ where
         }
     }
 
-    Ok(uri_maps)
+    Ok(uri_map)
 }
 
 // TODO: blocked by central_directory_inclusions
diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 8e27e921e..745b74bd0 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -254,6 +254,7 @@ mod tests {
 
     use super::*;
 
+    // TODO: add office, epub, and other file types for testing
     const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
 
     #[test]

From 744045d98551e376570df7694b99a5e2d33328ca Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Fri, 12 Jul 2024 12:13:54 -0400
Subject: [PATCH 17/21] More file types for ZIP unit tests

---
 sdk/src/asset_handlers/zip_io.rs |  74 +++++++++++++++++--------------
 sdk/tests/fixtures/sample1.docx  | Bin 0 -> 12980 bytes
 sdk/tests/fixtures/sample1.odt   | Bin 0 -> 5651 bytes
 3 files changed, 41 insertions(+), 33 deletions(-)
 create mode 100644 sdk/tests/fixtures/sample1.docx
 create mode 100644 sdk/tests/fixtures/sample1.odt

diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs
index 745b74bd0..9e24b0ba3 100644
--- a/sdk/src/asset_handlers/zip_io.rs
+++ b/sdk/src/asset_handlers/zip_io.rs
@@ -254,59 +254,67 @@ mod tests {
 
     use super::*;
 
-    // TODO: add office, epub, and other file types for testing
-    const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
+    // TODO: add more sample file types
+    const SAMPLES: [&[u8]; 3] = [
+        include_bytes!("../../tests/fixtures/sample1.zip"),
+        include_bytes!("../../tests/fixtures/sample1.docx"),
+        include_bytes!("../../tests/fixtures/sample1.odt"),
+    ];
 
     #[test]
     fn test_write_bytes() -> Result<()> {
-        let mut stream = Cursor::new(SAMPLE1);
+        for sample in SAMPLES {
+            let mut stream = Cursor::new(sample);
 
-        let zip_io = ZipIO {};
+            let zip_io = ZipIO {};
 
-        assert!(matches!(
-            zip_io.read_cai(&mut stream),
-            Err(Error::JumbfNotFound)
-        ));
+            assert!(matches!(
+                zip_io.read_cai(&mut stream),
+                Err(Error::JumbfNotFound)
+            ));
 
-        let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7));
-        let random_bytes = [1, 2, 3, 4, 3, 2, 1];
-        zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?;
+            let mut output_stream = Cursor::new(Vec::with_capacity(sample.len() + 7));
+            let random_bytes = [1, 2, 3, 4, 3, 2, 1];
+            zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?;
 
-        let data_written = zip_io.read_cai(&mut output_stream)?;
-        assert_eq!(data_written, random_bytes);
+            let data_written = zip_io.read_cai(&mut output_stream)?;
+            assert_eq!(data_written, random_bytes);
+        }
 
         Ok(())
     }
 
     #[test]
     fn test_write_bytes_replace() -> Result<()> {
-        let mut stream = Cursor::new(SAMPLE1);
+        for sample in SAMPLES {
+            let mut stream = Cursor::new(sample);
 
-        let zip_io = ZipIO {};
+            let zip_io = ZipIO {};
 
-        assert!(matches!(
-            zip_io.read_cai(&mut stream),
-            Err(Error::JumbfNotFound)
-        ));
+            assert!(matches!(
+                zip_io.read_cai(&mut stream),
+                Err(Error::JumbfNotFound)
+            ));
 
-        let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7));
-        let random_bytes = [1, 2, 3, 4, 3, 2, 1];
-        zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?;
+            let mut output_stream1 = Cursor::new(Vec::with_capacity(sample.len() + 7));
+            let random_bytes = [1, 2, 3, 4, 3, 2, 1];
+            zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?;
 
-        let data_written = zip_io.read_cai(&mut output_stream1)?;
-        assert_eq!(data_written, random_bytes);
+            let data_written = zip_io.read_cai(&mut output_stream1)?;
+            assert_eq!(data_written, random_bytes);
 
-        let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 5));
-        let random_bytes = [3, 2, 1, 2, 3];
-        zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?;
+            let mut output_stream2 = Cursor::new(Vec::with_capacity(sample.len() + 5));
+            let random_bytes = [3, 2, 1, 2, 3];
+            zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?;
 
-        let data_written = zip_io.read_cai(&mut output_stream2)?;
-        assert_eq!(data_written, random_bytes);
+            let data_written = zip_io.read_cai(&mut output_stream2)?;
+            assert_eq!(data_written, random_bytes);
 
-        let mut bytes = Vec::new();
-        stream.rewind()?;
-        stream.read_to_end(&mut bytes)?;
-        assert_eq!(SAMPLE1, bytes);
+            let mut bytes = Vec::new();
+            stream.rewind()?;
+            stream.read_to_end(&mut bytes)?;
+            assert_eq!(sample, bytes);
+        }
 
         Ok(())
     }
diff --git a/sdk/tests/fixtures/sample1.docx b/sdk/tests/fixtures/sample1.docx
new file mode 100644
index 0000000000000000000000000000000000000000..919cb20c00d0c6de08b5274e0133c6d099673c4e
GIT binary patch
literal 12980
zcmeHuWpo_Ll66b6#bhxvGm~Y>VrFJ$X136>n9*Wpw3uy?#mvmi&|+=R?3<k#zd7G`
z&i>t3=X7ROW!=cm>WIk98`1Jo5O2@`kN{`^06+{_%b&E?1Ootw-U0xq0BCS6Av;@V
z6I*9JWe<B3CmjZN8*8H6H{ev+0C3Rr|2h5-zk#}hA=@8J$RfbR2c+0W6~q19QX24Z
zz9f3Z11KynaMkCy{<fD6RB#0quvl1Y5^`4HDywQ=;KbrrYnXTSZsbRpACmmzbuBn)
z7v{F<eT}gxHWHXt1qL|$tl8Syv86e{m`Llp<3^NV<<j%g1JTv*08uoF(W+#gk*owk
zPZ+H0!1h&$<W+KeYPotSTtwY$6u9`>Id&ggaS|-!1*a)>vqb42{W`SJL+^v>tOW05
z`54EgKqvh3C>$*Q6nV8MsAFeBXEp`epM4~SZU!acRo=?l9?jMHbh1l$>(?p;<aG~=
z^uml_v7^ghb1T$yRx)3ypb8_w+IU5_mhh3NW7ly2@AqVb(53mSL@$QgQ1f$_LsM{?
zctx#NU}DW7ay)LwcIOw<!}3QRuE8www)p#2tE?+VE!F|d2UET}Y2veHV^B9X*&p$L
z?k+vr+(7^UudfgQ`M;SYK`c(=8A#7$K^6xGGD$s06Kf|%hTqEnnBxCo`}@n#%j3RV
zb~C|$JPUjdoM=~A{En3;%V;#dgtY(#ttBCix-@S&_wvj;KM$^RU?4U$ITb(d>69+&
zvYw!Gj+3N<5Y`Sed#~HA_1NYPNc`MZ!0bM2zX_kVdu#eUNGd@+7#OaB9yW#p8G9F!
zGSY>*LnC~<Qv_#0LOnKZMD{H&(?MkpnDU7=JC%86)>2%{3##;kNJlV!%P96ISX2I*
zWLEl^=pU=KIz7sV@l7m|mee<NIMz&z<ms`7<*+#CZ4-x{d7VYQp`Ynsa$rIkAH7Yr
zQ8Zun!gjIwInIbR>hw13aMN`Ju0RvhzuU*G@i&EQ(92*3Sq~ln8qCek(U|eSjl|f_
z$i)UUd;K<T{b4X*pt%bq=l}Lm9ybJ17?AJ&{1TYrL;v1KUU1u5l6YzM86ZNyUrYxx
za=p=6B`$F^uHSO*SefE9=Et!#_C3rkV%n=H`==xFengem9F2PkLDi`MvXz?E5k3u8
ztiG16u65?PrL5HSjlXa%m*P0{m-+-_9y~S^zHBMR$mqabwmwxz8(o)h$HYB17Flfh
zOs3vSRb;mt_7F?>JHiILudC~UoOTdAd`iUp_OB7c2V9cs_*%wq<C&y{WA;Q;rNYJu
z!;M&q<mpPg@_I|U3^L0)b!s!G7+x9(`_8IUoSHNyb1tH2+Z$1%v-YWo<@h}BK}(r`
z*BW&6C+iK6N5z5z0N#O=;dib1tG?tX+1M=+A^Ua8E&5hJwf8cHW@yUL&|8#!N>Q#@
zLLy3;FYY5EvU|LF|Ab=Zq{cmp36BR{zUM+bB}8nqcU88a7;HXT^Q;t-B2iQ|bpp<f
z6{V#fnFkJ`VdmyL0mbVdtM49mK4s+xSaZ`yLWC$%dP(fs+D1qtPm&ek9chh*plDhT
z)4#(D9SMFHb}Lvwb?#ktC_xTAgL6#p(A>RfW53N5kwnu5WdfH@6lY|zpJQ}~DO9+R
zIP#FK{|5Sn>HM+y+)z2FT<ycSF&cE+VQk@+IN_5vTNK*0#08h)JmpAVtmi?`BMbu>
zy+G5uFe3<|MB=EKN7Yq;x?%Thx&AJ+G@~VRO5zzQFL(v2|CfW_AS{R2NfK$Tu#{FU
z6^h805s9^y%;|@by?lXi1Jm!z__<I@=2M5I#5H9zYBB~Txxr}kL{YiZ-0?++UHy|E
zQyODd%)sU^a&;P$U%K*cJ0}}T8*?BK9Xc1@>rQ9-%3imFwOYKRj3j1wviWusBn0Ue
zEcSVquIZ&~a|%3wfQZC--DHBr6agak)L9D=*S%a*ww*SqyQS*^!fL&9G#$*@a`K5p
za=Z^+L&w@a>V|rPK2~>k`a0<S<u|N><Cehl@*Y)JHdp+U0@_foN3LeS8NA{vp~@<!
zUHX0$8gj}3DOK3db8p?>^mqHiyOrWs)02fgE9-a`@1E$sZ|G}+b62Sl@vPH>sg)q9
z`N0^IFtvW6)pohgY-+Y4%i_x6nLZjIfz@pd&e(L_zkJiyZ-OkkG96}n-oT`KSr@To
zqO76Nzzio;-z?T(rlnFp8OBDwtK#EVp*^(vDG-__gzydO*w+B#n<CczJkRfL)`^jR
zf_rj@<Y6Yw2f<bb5$h{W*=ej6vJR3PgCT*IP49lBBYdkU36A6@fkpVHX()KaKEu`~
zmz9X%rdS=<!6MW(#8jGJ{M=ir)sN>Qx7bSv()NG12Q$B?x=}~~V1x(&K>x!YoXt&a
zOc;MJnSb-4#~LHyxSVL6m=F9R&F=}KJCof5`M=4y#*-iAZ&NK@M$CSlAR1_8CYFy=
zV*D(q=}CP(D@2XJHa;%(n({e_<C&Dk!g}oItn*b(PR=pTEa?F-^ONV<!GVU;&0=7T
zA8HR*p~4Wgjsw15E_25EPdje6FM)#HVA@^g;4qQ$LmEV<+Dp&Z@n!a6Z>WSn4vKk0
zHKczGaW7KeHd;WqN5{z%kCLH=%ZZWM#iWGKlx!!a_N>&R4MrA24AG~WS<RHa!1B*g
zJUFgQ0ERwc!c_9~1>sZA{EQi&A1Kf7xC?NDfuM}3!{Z(c%0!%i*Z_bx$3W2c&-ltZ
z04*Kpg@vQ6MQyWRlN_b|2ZpB{hY2g*u5`>5C(J&u`-RwOcjoX}`QQNhjhZKeq%gEf
zXP1K;drUtL<cYX_)1^tfi+H(gcrmTmQh{%SB#%x!VMqM>tUJ?XC4Z^D@}p6wYKjim
z9YK=7O6yLtQwR6}sT&9$khj3mqH4!xDRqs*QeZp=Yd33Y+uj&dRBTAmf^sju-Kz;5
zO^qw==gNS_1#b8*j~Y)(`#nZNAi*xPKO9sKQ?9mat8NJyMAfF}QFH^>vH%llhI8b-
zYVsz;UCM@fT{^B}S-gLn_r>P%#nrd|&WsNpj<<K1)zQx%)wgf;x6O8RU+-?#)_lQU
z9&WzO$#vFjd0@OA?lI~;zw~#OU_w^dWwm?09Pha4J>O3$!L^O7Q^65#B0Jc_Wdyuu
zOMt7B_7gomAqxiU(Fk?jcES=MHxdAf=x5u}6ZvCkL#4X5^&7WTgH?@1Bm58pOM|lp
zTZI>daQ{esAaANOCBv>Z3w!mmQEqfsV;LV>&f3Y1vu!!6pXIbh#)R`D<cJW74x$el
zG1xR#2C?zG*6138IS5|UgsBn$ka_>3HTwW^bcDI&=UaQK=i2Ib2`?PhPWDL7R5c}M
zIjE@AKu9u<#1JggHpD&fVAVb*GL3CMH2=u$iGWCiKqul4C!~S`z5!f~1Iwmxfu}N5
zS4lG`P{!}Ric_2q2QPm~Fpd4da|Fp0QIYO|W?YkXkqWN>jaX70m%WRqU<29)?X)}+
zhz7Tgtu48k;M78e;H0uHC4HP;j)r6T{IjraM3A(%0lPs|%<L35r#%|kzsNoSx#=+#
zLC&aqm-+zLf0)MWr)e7Qc!bJK6moKX9E~kqRPQqM0_pY?9v@VAWY0H01!#@TT`g8t
z;|Ed-&OCbE0e91U#o_6~`uL_1AtRq=O(Y2OphQb2uQrC1jZV%5BgNf2eB*Jgp`Sf3
zeGS-xhSD*R!^no!c2bRsJ@&pdxef6cX<9plyM;0<^2=17f_NE9V9%V1(IT6MmIUw5
zeM&Rhf@(Fkwl_Fv(-=%ppDr*tK5#`N#_~$hjSsc#)>huH&9Qqt-q(I@E;nT;!*2Zv
zRcty5E~*Dn!QfSw#01<m3xnZCfeW~8X7FiRevJ1&o<^Xr>o5<4xNk}y?Yf6QH0vt9
z!wX-h@~cTN%r`r~SF9fskZz{Sa&A;<#38ym_mfQmx|fblx}_-s!s^y4Dj!WsDB@N%
zKeR*Zl+DrW=`~bBQ!O+S$EFUGDKx|sSMiJy=cu_7(|yIjKq%l#{`{ipDMBC%D7Ct<
zBsU=ImI=NVzNe1W^8o(%kYOh6w2*1yA=7(f$ZHexGbl}h1U1;7mzSfJaei)Ox&ZQn
zNtg}JP2ZREdPx%8W`AK*xb(pfYL$}gMp%vMB~J@vgx%_gTQo`nn_~24bV<RVl7~@P
z89Za4V5E`AkC19U!Xx7is*kn!3sX17si|2#i4U-^M2|T>&>k7HW#rcUJdj9pq*E}&
z_mQi%9B(G`L6esK9x}XuPlcdTk(>ZVnG#;lPMKm7@V<Uiv_;caWD7yQMT9sMqKQ|b
zc7WIaXpcQ(n1^j@L?!iP$56DxandWRWe={zUwU$w`{Q(p#TX&sY0Ox)<W<La8i|#(
z=E~+l7muQzM?z?9%L8Lb_TG6x`V;mN9eshAR9pP79%elvS&(T8Wr!gvIwe|KjEx8T
z>Y|_IRLh)5nhI%r>zU?t+LNSN{7-dElA*wdv2D4LRxDd5Z%S7JmWprT5*>T#*-B3w
zb95?_q@FDYPF&i=CaFlErDlvz{hw;&FoxWKExt$3K20?BRS_2I`7MYHmIlu+9osvd
zolxqUy9Oh;Y(pcm72|Y`P@g5Mrl{Cj>t%D)j%K|AbsHDW5ec~0g#&3%92*snIAQA*
z8?}n3wM9B7=%#5i=PIX{B0m*8n_E~lU(Nsr+rG}7^8e!m2nSsJu?N~h$o_Rf=w#yT
zY+-BW^xNcDrz&H&#D(0>SpE8a+yh^umhQb^28rvTOqjctGQ+L3M1n{VA-KeX1LotJ
zn|NqaD-3L_2m=he-R_9}9oKa$a4Z7RRh@?9ZW*ppbQN%=jTU6Lw>mds$#n%ONvcak
z!<Fd;Ro({V?0mho&7vx2Zz=(kw@<A0K2_123q31q`vG4<t6+qvPfaF-!?!bQCeFux
z!-QEb`qelH#pk?xg6ftBhZ%@75FcguQ=H*5N&yFPD3w&))@fFF%-*(*(5dh$({TOB
zzLazh<aZSj%bof8v05;7?zoj`jvAxs`;)*4?4=br4Ls6O>ajP|irebcTtuQe{z1Ud
zJWS#KWwn^rrQCvmbA4mAqC`0cQfNKo9_~*a5k|8PcgP-tLSaMho1{nwxVSz1YQr%8
zG|mb-Jt!?54-^;{%ET_pu?9WK=<M8+`08t<B4Y|Na3A8NNfuTIWLfCs2pY7-6BMYR
z@>%_+2<H!n^~`hc&64jH7G((Aa8ur`1?dO+C>o`9i}-_~MtxWV5XmVN;#g#hD@Rh-
zqf!=O3H15YCWd@3x5vO$T1R15wv$h?R0y0x{N7i%QT+J|RwBWQZVG3os{u`0N+3gz
z_yxri#!2i){XWSt+M$}o`w5}|Qb?_5TgeI$e;JcVvc0TOnCgYV0gJ&`N)#r4a_aNG
zI<6V0Fg1B^$&L*zU2p3IiY{}r8Aj#vJa8t?RDORdjxRV>Op^$82a$0L@#+eSRiO!j
zeCObG7p4Q^8ip`1(B?J>m&A(4Hd$xPH0v8)d2NwDd;?jR2QDCu@9U5?;aq)-Y4>lc
zGV*6yeQEUEJK%OO^n%94<|Zt;GZ?lS6a#O?-7(*W?^yl(V!{<wR7LzD&3AYZ<?7le
z$<`$Qy}nHkN{uuMn|fOR1SdhNw<hxGIehn<A4TGe#)x=`S|6E(4`9FU1C{_uXu>mY
zO?6f+qK->ft^}7Vw8OE{VhpUw)lW)cg|-vz!Y;HDdnm#Rm=aB&$c7hG;tLqUx`%oF
zadf>GC6<ahK0FF{zgsGM4k=eIXWZ?svY;fVQ6hzC6LncBrCx$`ijK1>fFK1R=)HXk
zt|eUHIIbQyGzecnN=VD_cWd0Y$@!LX7nxzRyhLIa?zz;nK+#U`+Au#)lR}6Re2)sf
zL?79XmJs%V;neHt^FZW%dRJ%>RMO4VV`(pyg8h^w*0rYimuG_-B1za-{**?lL)~Bt
zHZ7TM6*#$Lh@2CXpxIB~cfhH&om1d4>T+JoE6t`BSTf=BmY4`eF%+2E@JIOItMYd4
zJXu<FR(hv=TfNplL9abQ{nN7elKES$1&A)#0c{oEfyf2W4pH9G&fba9$j;H^Uu^h)
z7zL0+O^;KM3uHq3ye9L6lz%3#Z;As>r|^w3IfX0lqN2e~>+NFUN$`^w9DGa@<=W}V
z{pp6|qVtVT;wntthN3YQtl+kPcAP`a*!Yp_$r|yPItdnafge1({y0r%hh&Ll@$`9l
z0k{n}))+TTynJF#U_H(E`Edy4o;Oj*vda=W?~XLv`@_Zqj=}m3ntIUHe}s@Xv0u;Z
zhm0mgL?s;-4~_aMz>_Nzm48=2-UzMFrgd`2R{9i+-WVAWzE2c&-MB%jKIbCO$TNLH
zxB(oQk^+|TVcMvo$!D^BRkxM;sl=OD$<jZ@nBBlnfUBm2&?A{K^aC{=BZDu!-U4%c
zT9I{&!Yrd33#+ub_7u@;ZNWcJ%J8^P=P(MQ)vi$PDN$3iJGAkPIGV@MddjKkS+k25
zQ*u&_i^?wiOaBSZjsw=(-9`mr?~-|HP&gSp|Kf=0RlI?V?)q9(pa9|Mlfmvk&styN
zoyE>T`sohZx}p5BZ8HVMY?KWQt$$b0?f9jrG$!QWGuj*cm@`f+lv<5C<CzoN{MGB+
zMLs_x>FPs>OD^IzA0MKH^4!v##<wtiukL&!PbvLHd1wRv*z1}qRA~kYOd>8LGnIn;
zYaDL034Y|I6oRy>IsRatn!YdB^U0B_7$|C0*$H)m74<r)g&(yjV1}Lh6Qnp6#SK>`
zj};|`t9;zV>W@Qqv>2N3C-Jt(ahP{8!fbJ57s3m@1`-f%N(_Fys}D$Wv(LS09QU+2
z95o*VEDtLRXOd}WFfVMvl5w-+I2)PSrqty5IqqO1#-g418R8Iig40pImo+LE6+NV3
z<ZjFC)5zp*>sWu%V#Nd$+pNguZd2+h=}FO9z|<uRb5xXUM&RZH#V0S9WG#dme#9=T
z$t{180-xt!d3r$?bcJsr1gq%RBR-H4$uE1F)Ibn84Vmm>N#;gYAm;qe+UM^7On0H1
zJ#Cjy&)mgJV%wu&8?tT4RWk16m$Khfa3aBv-0d^Q{MLI9#cO^OwmC1tp`%4iVStxg
z`$}P843;4AUhFfZthy;unu5n(5Z@HnYa)`JHj8-2H@^EEas^*|vVJMv{?ASY_7~o9
zq!ji=T$hIk9xFi-xukj-j-51_Rd^nEt)#I|t)v^qWQ=$`0>Q5rK@ni(zW*3IID@5@
zl?AECCr~TeAL`*|VyN_&bxrz|o)svL12_kM?+_YjEgVFnRyBa{*X`3^f(kj92_|oj
z3okpJI$JDpx-$gEy#luwh_7}=7q(Hsv9Fu45R_v5=G3t|2YMfuESXSYM0VgzCWIJn
zKN>bSwWo4wKpd%s#L6lD2rC*s&{k%iO-;zBwGRtc58N9;2~>(LG9b->C0z2%o|vKe
zT3<tPy2B+?wAmG8g+?cpw*huD>mRMt2CuInESFevP|QAOju4!SSnMBi<Z#>XG<b)~
z+S2NmJ^=<Z#I<dRi0y**C3@0!`<RjG04jpfuOwK9-^i7TOf2(#;r^bqTom<a`YX8;
zr2M<^(qtCC)iP@%73IX-L+zkX5;C&OtMzW%&bM_JSksHOJyn_K-Q@Vo4cfj-E<*Dz
zM>a5q-wDBw%+v1!(1RcS5CmdQYHKmllJ~?ifAV*lULgF_^s&Ar7u10S0Q3|90HEr>
zSw<&k4{MX(mz2jkOO8vTC_ZbY&roPtu7r4@BqD>QRMb)pvz7Ju<O!T(Li$uB;9v{@
zO~sB+d%<uESjZpDC&H|TR4`zw$Fya$`kc3Jx5lE&em)LP(MfNHe?R}S<==0_5A);h
z)bM=g_F-nBYse7;icZq-6SMo~!unp;%S$66;pn#~>djP2z~-^{jvjoHM+YB2&)^fe
zUd{0r;}N48r`UYjr0}YPm5HzdVA>5Wzea@y!Du(ta8{ITR5T?7M7*nj1V;Gyo*k<1
zw@ru2<7h<!m>#mxqzxY0ae|iY4`deAvZz^RsaV%ElkT(_edNRT@P4W}Sri(zKYft{
znLjAX|L}G%=c~wtT}Okm2M18_`Suv3V@Hr0X!OHC;Em?@zy#t%X_;99G-9Bn&;})#
ztHdUgcB}_#P4Hv4cw97pe5lyQt9ZIXa&w5Q^&pFRbCgLms?>Gq%lzqH%x62=vK8;g
zI%UphqKmc5w@bnv6rF{fCHIycCH!*|>1M0l+(S=HZRq%D73G2x>yMlc7>GRtqqR>m
z7`1&FQujFy(OEn7BgqEHh*8GjK;FpS(<-_atn_`Eg{+-mhngKpS4%6cp3PQ6+-N<Y
z)S35cOKg}jI`HZbmbaqVi2ZvC?){fCG~j_xg~-lN;(@fxpI%bz)X99kjVVN5D`e(c
zo(g^ISsMlS1aDaJy?b;!?o#f6yeT_|{k`kzw^G(mmm3ra@yc&f!oMHQnQlc?{Qw#u
zpEuohyJJH>0m-8~jd_b3UOg)Uf)>^()6~5QWGKVVsNhyVJb2R!Ma|=~azADj2$1=<
z!<6+U-uuE;=cKMvJd&HSoO*WaK+pJ~^@P^_w9K>VTRx7^d4`=C-<L;-M4A~t$iexF
zSDFK=c-|`u!R~V7!ft&Ga=-DKhyS?p#Ylk^r=Ns_ElNbn94}mXoC||I+kq}Bs;}mb
z8K-}<5IUXr!bVr!={B}^X~_XdNV;qfBt-eG00R|J-ki(N7b^&EzFcnID}?PUpj%eF
zmAaN5gh9ibZH<E%5^?<!qaT`iPa^va{R`q0aCkk&x2Ok*VDz1hS)n#p&H0qpfL<Xe
zC65=p$=n&qbZsK;l=1Pa_<I*>1msqyHAKknL4MSj2k|DN+TJ`naG96$dkIl8P&l8N
zftw?ivg>J9u42KxTYl~ju)=ZK@;q!y+fvkVF<m8U!*cO2I#O5~-gj80&wG*q?#XFX
zo5Jc@7TVGVH)9BB(y!AWzh95RgHC#~`><!wu-S4`S<Hn|O~*raWgaPK+CVovSj~m6
zOvfL_mTXeQXnN+<@VigzTg#|oi;z5lMi{HTrJojY2d1Ulag%CW6DpE3%lSFGXZs^;
z_E=yt+qA-rG(&~!E7cfC%0>;^4)}Al%#}B3E<(0dHs8UN_Z%_S$Qh>6VVa*=T&6CV
zW^#t961$+BM2O(FXP;DYBmsfdl7KmbudxH!*hyW1{6jnC`SB!|D(%N>HLzu!<*s=s
z7C?A10mi(Z&LWuZv#eQObc+QayZj@vusA4!apKWSwuuha<fqWw{P^v)idm_o&opX_
z0sEhd85wxb`D$Fwz*dCmA}rj!fR?>Ke&%Tjv~@mTPqfO+vWz643PHH1ZhO>{bh9OP
z`96*Az2+aR<Ho+}mlMwO5mF{l)K??^E+9%u4)!ii-89%i+!0>I{dHIFDyRSqVNquA
zrk;<RQZT({Mw5{Tx{iEvGcbhzO&RfLAYoE)ErT$5sTNAL<^t2w@ay`gS9u`!g*c5+
zr?sPsud<Ymi44@7Z))}1Ip5Iid9*K32c2&MR|1as=jpjhzw3(74GA7mE1U6_ZeX}i
zZ=Ga4VYiV<PWDpdrgeeGSVO{ntfiLD9ibVe5-UH4hAKZV?w8tUfsM{4D>{Ek3<kr-
zhK&e=c`GUs2%fk3(LWYt{1r9>NBwok*;4<xYGy)TSjrCvD={mU;3ahQa=NSdN!nlG
zJ{3IglJ%|Vj#*wKfgW8CNy;g*QeggNGY;8`KgH<1Wkx=o4@rPG=EZwad7>F}PK(+Y
zPJgnRj*v8f;%qVLkKTshJd%4iKFXu7J-Nr20wM__3USCYv!xJGwaXo@Hd(1HW_fOy
zwj{e(CMa)t>mvqiX)kvczTOu~Bgq0?XuLkbpCvVOY9-CQ@C<LeSDD0C+ohh^Ra=Zw
z?wTfvev{*c<}Az2J2wp*C~mE}+^65#giOWQvLvrD@mQQdY2aS4)2N7Rq$|RwH%K$X
zr%#k^q+7Z^PbvULMlso3WwG>s<65g5376dJGhM<fsk3(cZjpv*Bhl-%^>}FwW?YnJ
zdVIUmW7ODR{4**bHXgs=Tjepb4ftcEFTzeNJ}R4v*Uk(o(P^28bJ5THBZ>)^#|l*0
z6}!U+A`DrFzIM@c1t6-LmO>w2bVhe1ey-DUjgT|qN7oS9*q@eBuoA8bqcsDF6Wrnp
z#N!jN1n=x8U>CP?(d*noRgrtiHE-14&J*)6LLDQU=LkaA$$OQf9V1iafICs<2%^==
z6Ti_cMh2%>ZUS_}pf7F(!Jg;@!76BjKI4L){{{Kung06rnPrJ$<hS(7(Eb=SK_B5%
zDlomE|9>!Oe-s)fHxwFS0XTHQ?9Z^o<AO7dI4yAh3!Dli-2bA##iyzC+&Udo{vNdt
z6>7(B{4ORuH(JlCr##ce@0BNL->U+>-Z8{)uUfTT*ymqH+7fN>!lJ6=Hl(XoZIw!q
z$W!w9Hd}Uf7e%dJ<w))7Lc5Hj`9-&C27DcC8reBYA{xML^zd9kDd*|8nP{|yK@8k#
z$S!l%&`uyh6zgiDAD)i*f^pSguEY98X&EEWD%d`FiNt9HS=L^Qw-P(klZER{Gs`u-
zNW#O1`lGp1KhBTosPWyTm@*PPAp>Xfk*M*+5AOxla8979_$rv(4Nv6FgHPmbm4#9b
zN?b8Z@xi3nh3#{3x|);_a?(qJK`q@~Aq6-j`X#PNqpoi#OTi-E&ma$`T0-xLvhyYw
zlmtVomlA^~9cDrq)BdGNd-;EwEJN0X+5H@fa0IzQLYIpnHCVn{hWPz1G#FY|83N0#
zlsJKP2DvoSQblAT7pK>L1g{ag40{vtC%cfgeI+)Ju@t*izjaCQdJ4TjCG4off?@_6
zXcV`{&^i`%q}g|MKh{I*g>tYPgqjs1hmyV)|7$1=TD6LxYt52A6R8U2ty=Q;Gfmbd
zp_bYW)57&$_6?+5zdins&Fl<Xmboo^U<<HYvcH`d;3U&8{3b^RB&UF)6!aZXLBa+k
z|7fiLt*7I^HU8d{@o&iA8-MR7D@a289J+WRte{y8*wzH3)7mY6?S|>>p&pCdr$ypj
z%=si7dAV<Jbfbo%)&5!Ti4W6>BFj&=Rfl?9&X3*i@C#wUCA-R``fU&E**!~|%9oJ?
z&6Ql1<tb$t+fj5RFz*{j+L<5ep$g9=W)N%yLr|m~*2#zp){I}9HEWg6DzxkGBHtOY
zCw;kMg9|hSUR>PFID49ak7;)1on&6!m`!vnQglLpaxd&Kd4SY3;7cAuLuF<9TI^Ld
zPqKH=yFOgZF(GI3aK}!g#WJ1Bp{WPe|6=ZepT2xQY90UBxC-59qS5|N*0zIg4SG*1
zG5NWLK#p>db2?h(O%oPV!K>xBHYyQ-j;>YE;akVvf!znV?dyicvK8(0y`Do}iZ>Hn
zV-iq%CB`MbjpT!7u2lwNKpK*6w_ROQA=j1m>xRrz_S%-@=H)<;J6j9&LRYeNsPd6@
zhF0TdrfsZH(<rt1pkrd~g6`yl6j>Ib*Ho51^PvFxaa<|Ed;bIZ^sC;%>dj4BCV_+H
z4Ybn=Mx<E!CqjZ8<;zP$F*(BVDnFY`FxEUaJ|>S1KK&vciH&Uk_3Qel#-lfvGE#|f
z%fXij9d^?pUY=dkhhT-U0$X5Ho#jwy@lWYL_?h<1R7Xb+1B0)=VfJzxt8(87)cdd`
zzP6k{B^izY7YDzk`ru^hb=qf!mw3y*onk!-moVw@$ZjckG0}}s=35YJ$FN9t8d{2{
z^G?m3<?A?p9yLL4C>Sf1T+(cs`m9|h<Z@f=O<+;l`vs}SxftpS(Q9RDJ=TJ%<~2-g
zg0j#zqx^(F@=kD2@ED(<Y|s4kIO|AFQ2j*7<+KadzHSBkxqe0NT8c!-M=88smIiJc
zVXES^`tbb}zR&!cv7x2uB~<J4QmOyx?d0)wur9ipE4W%7qV>k0dlS3zLg3izP=>jr
zi^-(u0nEFC!j9RS{i|oz<)Y0HV|7d@8~ErDERGv#&)`m#O<l7xxe26!q{^{{c^uRl
z6Ui{fFBL+Vt1;#KIH{3|N+hb%8fT9a$lORRvL;&wf<1w$y`4ck`?RRylFl1HMF$c>
zP?U~5b0+V0$@V3qB7;~-P=*!G8FRwTMu9-3+;9dCNnKP0L(`Yfhnw;{Y|r}PE6uZ7
zX@yu?VoKuZ1_&b9@zpt&#wxL1dp%8zdU_Y2y@GLOepS)#TWx0r7Y&3X73Nc~%%pfN
z`lN9BBGcuYoo}n6MJ-{uqMZA*jb9lT7JO4JjyAZhFf8s-PILkW@zE6NyEJDkX{4#@
z`_d9$kVVfuiybM)>)NghR65p-!NA)L%^jn=ciXg#7nXCYoEuk$P7xTrNA89b*sWAM
zA@!57+Y<#VU`u35DNXUW-1Tib03+s?8&61`U9l2$YV80?8x7;A=$%dmJjK24nI87I
zDK%F1;FI)Y^kD(B$_4+OgZRUCoK~lTQ^{i2vnz8hbW$a8p=PsUS(gD+l}l_7{8?}O
zl>)pv4}0x<w$>Gc!2XpDyn^qf@=(*ys}5a9<-^)+P%S5oImb@X3}98O+FHxF&17gt
zLm!`KD8*;aST-bf+ZTNYt+uzJHuP9BscuRv+{&Yqixjh($*Cb&YkfFHf7&fpSmL}-
zD_$*TZGA|4kXT!Pl{w1kiq_O=JloKhBJ<+C?LFvV7gZgbt&`Ee3GYy-<=Xoa7U#55
znpz?KCUHNk!eSSZjygP?+Re1?F5&t5pP9ICW@TYEAgZMo#1td{l_@r`xBm}O4B|fi
zb)+i}$%0bP+DI>u6izFYf@p#oD&=W={KX0&JsXZ?3^(SJPE=3UzF82xX^NdXJw7!z
zHB=|iipOUXJ~<}Ik_qP(qRp6W(A}=udETZNGel8^*`i~(MD6n)U}V%sP4ACk>x-bW
z*4~zpd`YUI!nGDjpPZk}cg#T#U6f~4Q_<X99PkO}s42@$VdbT$gPrL?3i23p_Y!|4
zSCMsn*wpg4?CH<xnZ=Eb;de_mE!DR4N1#ezfKfWc@DMm3?-HHHx`$WS>l%HF-kQ<j
zr$QPK6MDHd*CmR->|qB=Hg?>R(}cv-Nvuu&HecQf$)O9zQZ3Sq#FN4E;s6)DcuVsT
zp!`+=Y9md~LI_HW8ou%;PgPJT^f9hqqE&mfaDQ;v-Rf2u<4TLv#?>|@S8dZ&$$}^G
zMe@QsL@hyu*av|wKo`Vwgg%dJOFesORYqqZLr3F_%H@K$=D6GF5gh4Ux6{U-IX+<E
z^dRQ%pVRyQ_(}e#{~_65Uh3}x{+^8d2k^H#9z-$zB{lh1;NP<}{|fvDqOSj6Ih(&q
z`ZaU!Pgw_`|1kVp9^bF<UlX1Fgx`ZAzrVwOO?&!P#IK>^KSex%IQZX1{55d=EBx;<
zr9Z(Szkvw={9A<SSNN}#|DOV?vH#Wk-vR)?;(z6Y|HO~s{tN#LL;S0RUs=sRCCK6b
tG5-JIIDZBI-B11r1ps*H|1$c2c};n#w;=WSE%6-=@Ezn?R~Uc0`Y(^^S_S|B

literal 0
HcmV?d00001

diff --git a/sdk/tests/fixtures/sample1.odt b/sdk/tests/fixtures/sample1.odt
new file mode 100644
index 0000000000000000000000000000000000000000..a850fca6b114aeef7baae178d5e219a47baae3cd
GIT binary patch
literal 5651
zcmZ{o1yoeux5tMXVvvyTj-dsS5JXB+YCuLhW#}#urNIHD8|e^`?v{>0q(Q;~2I)@e
zhrbuTzwiIfT4&vR&b{Bg*13D{d(Qr7s9<2?0sgZw0gQSD@j31WzdO!dx3{*pbn|wy
zG<9;avo<$%vvzdg_i(V_b2N3ecI9()vUIR;G<UbRba3Nyv-EP)xHCoy0RAIo0swwx
z#(hEq05E|70REjZS4%fHYX>V=J}-N_2yKK~mjF=<%K>4GSfRs4u-?6S(T;mhzSMr4
z>Wu2&dbG6nUDV!LBj(Kj)8p!>?{tpFK-=OtysTE+ew#|x1eZ>gozr{gS6-!=idZp^
z2O>R-nUHw>h@<O;srpiKxDx+|7md(BII)t3sSI|+V|yAG{14s))XAt9XyF0pYK@@x
z*Jj?`KxnblH7Pc|nRN(eMrEt5u$sn{pW=Etzh}O|01MJtL#Phc&(!-tbCxdf<*$lD
zEe*>FPO_0nGx>p+oK#a)4diB4?&y1n#+JV8mE)<TSZo3JE!!pg(D2!m?=_`_Jtg#&
z-k?jy{6_fX*>#RFM<aa_LRfxMW8orC=hkTyap=_`54C@!eoPm*adhL+Q_`=}R}=QN
z*;Sz#k{vI3ig)0{H*66%MrsAIe6${b-jZSwHO3>x!GJ9{Vpv(oejMQ_yjwh^<u~(g
zm(5x`eXTspiK|Xn=GGdG*LME_Hw~M=D_ssXT!*$&%#)H1P)|U~e-op&gxDbng*i51
zMES}>j`4(NeHWA;OZRaW4X-6gp4rM|zPt0+?JWl2Up+c37|j3u?ui9D06=zkeJ-yp
z&8wvLjNjhW!TOb@tJ`naevav|=@P&X*$Zq9Jdix~peayu^a<EO3o5k(>OYFnO0CZn
zQf#IJ(jBu8EMH3Hb{!a0i7yQm#ZG@)RpQRmFwY?@M<R_vDN3P!%qz<4C-BcvC{$NA
zARqVTywN8$=C&FmexXLZ^ciAO3yzM-5K>zxUTg#M(g8tVJ_EL-y_jt&mh5=0GAxU6
zemuLm)nKA3&_CP6P?HD&Y#=K+;H&yyu7-pItVT~yKWQj6g>AIG3v!Tamb?udta`QL
zBdeKby!&G&09_<?7EyU>c6GPf|NAXoh(|BX-$nNg#($-XrJLzLkqtq7NWY70g+VDs
zWQRbrcLdfpZ@omGDk)f`Zx?OPFLIlG`^q<>!y+*ur&U@<*S&F$k0IlN?;K+aoXf3c
zA*S>c#FF5^b%;6!Un|}ZHBk9Mb&squfQ9g6P9$F}GXcOJN$E3^eCj80@M^X)xVRrC
zEHn@9d$`x}fOH5yD>rE2MfJ#Q=~pGw#cQ+rrKDum#MsX^1ZMe!>aw*o)=ktlU@>Iz
zpT#o8^ZE79A3p!?MuFLL;FagkHt>2ljd&qOJmc-IQ4*6Z1~rd@`PMgOC~AnRJ%o64
z*)WlV=GQp%<=Dr(_<-zAg3YnQ$jlp+0FoE?*6XV^z5$FEdy!F$PE7TgCgn9UJ<Zxs
zoW>!xQDuXvRUeNJ`#Fa|eKjhsH_6&AK^M^syW>}~dbDaiK8Xx3Gj8>EGAV{vZ#CsS
zHvC5jKh~>v0-?4`1~L4-r!9!QjvwWYMzmd9i~M<zHfkKhX}}nRo=8yxoWQ^rirQYx
z*Tnp}P9dWHYYu$dMq)S5{N7XK#1TC~mMvCb%U>iKPsrvtDok$Zu=g-~fh4XSo%m>v
zmoh9-pO46nbyv4D6)ZCfy*yVhm2twLXxmXK$^;tn?{44hIU*Pq+y8Ag6k|v;9^J)8
zJuUzMx_iUi(ZTKapYV4MyqI&C6TtUnsCY+IW7X$8Mo>yzNGY&w%?r=I1bk1V%dTTp
zbV<Lu<{YSZ+b9lY^1}!ZT_N`7v^{gOp>fMo7U~d6|2##hX%rTFAl}t({8rAB(puI_
ziMqo&<eN4s^m<MGv8K-FENd{lEPq}ucoBF*+>H`vORuHcB~`~7RV_@f(Jm}d49<IV
ztYpRWAX>_O<J!!baWLqa(;~>RgBwVL!vdH<Ydz*a(m=*i#*#eeZwbd|h={SKMW_jL
zXKyy}n9XYFgXuU*SbJr&m1}teUC~aNqnLdg_&2f(RnC-y#53JUbkOzYm0{XH47Tg;
zb2UtoH|J@TRM{1no%ptI)|VNioxG-zoy=+)jTyZ`K+6-8yM#kmZf^MAV&?it+@$YM
zM330l`59KB4AUTFoA!-1u5COYGGj-Gx>R|4pK=vT3pq&0<ns6?&W#3}33?SsTy(Zu
zj+9iyjQRO0@RvR7aQ;EA%@Ei}owcXo7h}K31Db<l@Nb!{gB#%F&Bv18aYv>x@!`ns
z9}+faZUQ@#8-Pk=->|qeO|Nlz@QH?P4=j3mxdyp>?G>%-^j3ik4AUOV8NK{CeOQBU
zj8<ar6K(Y??V(Ip=n}LL190J+F-_-Y`rc-+$jpgvGsAjIjD=MQE8;kY^VQsPu5xE^
zfoAN|J>~l~&qQEC*A7lsj<5Og+a9gIKV46P;kL^hJ9evnV)hUcvJ~!4CT7t3kyQ>0
zckn;qt@x2cNYxyWv!{v(NiW~`wasj7vAjg$(Gt3(kP6_LLpeVBcT&EPD@%zpo1Y8T
z%ZRI7iskgtb&Bp+*}w$`JY>lVW{EFWj7=pt?Xzl##dmRz91xIWs%1U|Y@Q@qv&G^$
zP!0;LzhfIS&^)C6Nm@XlZJVpN(fwmUa%pGb8b8D!e$85S`zedaB;7hBe9)_kj|9I?
zlMYLdfJt)XCzS_A{tQj=SH9CQu~y$q<eTA`Suay+{+E|h|28)u*u5-*cboo>G63-V
zc;V{iZD;w<&c9-?6@{uJzBS6a4b1BDow(RJp<V+86i9>S^QY&r&Y>gs?YX(b1LVp*
zFQ+|4S2W^z<_^sxciFHpp#%(FDM$%L=c}dAHTPx7gO)IP@gpBkLnO+YGe^{L_zKx}
zO{@B*JensG%B=O|$k=P?cK@LK-8mz9xai8bVo;i}x}`>cQV<2bY7#jlv3Dz_ePF!E
zMBYFZQn%Wp9Ri6th!0Ukw5fI;LJSN=LbwwVbwVa<gRmGrv&D8g45m|$K82$?a~AgR
z>0~W};_2ty;A-o_IQzpDwH1UKsfSl*LKa+YuWb4tGAri|QLPc^;>Fa!_quCn9ach9
z7e&s_;u<q9IWta=IEMV!xIqUBX2(*S$!SZ|6a`JQ;MxU;y*dn1-S#;;x2HCY3ya{@
zIZxJd;kc3lwARp2r8Oa<gLj!y-){`6>gEh&jgy+)sxyvSc3?f+vwn|*AH~)|DC1m)
z7!lrR$<jx#$Kj8Af9B&HiQ3vjnmjG9#DRvjLPh8oyB(mxPbU(CMcT<;Huh6J<1U0C
zlPcVXDi8gRVLni-RDEL3=GF(_7vvfb(I|<KdoV%MGjF!($SE|Fck7Ik9?zGLE3New
zbToaxeL*;%V%)Nv9|Qv!+Dw=Tnd#Yk`Ut5JbyNX<TEQ+WLbrXYaP;NQaTgnLzc%AR
zkL^v305)}R`aolzI_K<XiIP>&5R)-=o~Q)9<Ihwo@*ROchW30~Gp0{MjF2}p7azN}
zeZ!e%z1sIn&q|=xHU6Uu^lY-Af=7_5^fa+SdWDy}KIV>#k$zjmPLlf{Tq4j<-onFq
zO!@C!O>MC8yE~M#FMjY3mj|GA)#T#w-kl#z(z-3s@{CAVh7>^PdDi(TW9dX$_Gkp<
zaShS9Fe1sBn)MA@;$)<>`RwK@bCehTXZh|9BAFOg9&W}6V!t|Xo+b5dfulHyXjkgN
zST9`M$W*P4{9KCi3ME<{>NGEGkyyJaYkN@B=)!v=bBr%!DG}4EjZcLIW_n}r^aYXo
zh?Iezx?bFx?W#(kKUXKb4?*_cJxR76bXa(N(C<yISh?bokIp!!=q!e0QjeuZ6%YKR
zP0Xm?cTNd^P7Pkdg&MB%@Cqi%(z3Sl$0ga3g-I05Q2IgQocjT6{hvpfYk|OfS{7}y
z$VXOGn~FbMHltKLus>I_4OLd9RrA)iW<%>M4Y0tyL|-@Qy;(AyS?h@SF8XWxtI5Iw
zZqnD%C4sI4Ojrh086Z^Tus*ue1+BP#zKwb6Ie6tj)VP=V<7#e?HH?%SsG`NOP{5Ao
zB)pHjKi@DZTc;%3*s;y3VHSkbtyN)Rn+4B&S_#C%_eY2>qg!H8`+tpo5>9%|Bo7!?
zx3j1iLWNOI^E`!@70mi66{VO2r?i_WWsBtKjsgj-66O!5*BOo@++k3|MTY^{>xEsP
zcpYwNz<7XpbbkQn3kCMtrQHRb`IJcA{vy=nL;iKMl9*bz{bET!SyGHoTJxovI%XOB
zYbos{QviL@^C?k((lNrs=oz~Er4{XghCxju^9|U+$sdbiTufd)cTA=H85B|;ML}NO
zFSv>a{#CP?DvC47p}Q!ZSn{;ZA`3$j8cux)=+zbhF+J}ra?R*|qe+)bD^8p7g>R7*
z^JVOb%Hr3A1a<@EFfH=rdj@2SsV0*5I9k)GPWq2Xmp<=IED4fe$%*{TZrYOs$*p9C
z-Il!CvLfH8OX*%vt*M%_FlnQ!NT3Q-S#2bDtcD^8p2_6b;5c)>Z3B)^>sP#f{>9O5
z=DtS{(*=exQxG6UR_%OFms-^q7G7MKn6#l96vE_z;wdMr$}5+;kNZ97R4k`kurc6M
z7lhp(@hF?tTZq<X>jv9u5OoTp+b7{#0&!`7i*<JpIkK0?b{41J<fah%l>|R8`sL!M
zN)`10Q?VsOJ7Ds$WB{A~Df+nt=jRNec+$89e{=1digMZ<YLnD88D%sPoQ39Hm|IA-
zTh(f+{zEm_KCsOYqv&X(9$@=ej@In{VcF%;{#*Q1e|B42qW8_%sud|P-EoWLZ~h4%
ztS#H%pS3cYDAc3r*CmBf1F?>gg*UwW6M?sFEOIBk*hG`6HDO~bwv0ATl$wceY~_kT
zpnL9HBvC(S@)>Nlvdh9a;fjwl<Q<o}o_rFK2uglt_MVc}7~?@#S6JnWKFcdFP<L|w
zB<<wI*%=2dT3H|^U5>UykWf<4xTE4G7+f=E)Fh940!U9D8on1w5n4`sFh$$Ob3Qdf
zbq&rwqNb*2n5tky>UE*gV85E(0J+#Fghxx^bYHGby`YDsOU}0ooq-Wose0X^=$kZ}
z)im`h`nJw4zPxp<-*Sn3US8;iZ0q8!Ui+NviPz!tEseaKxLaaCX#EFt!S5xUC9n0h
zg=e_zb=BR_yHo_%_%3rG-Rc*YOB5H0he8H5k5~qdkL`57?Wk;Xtq$6H%2}YE&m$K<
zY95KNmJM%tJ7f-Cz3L5_Yv|qCCNbFIft8&jPg|!3bfIO@C0?xZ$F~u1Slht^k+iRJ
zK6bK)MQ}=5-R{LHc!L@x8}{s*IG+RF!J+_qdWz$Hd%TOuBeL8M2-_2JzJt-Zc&~f2
zg?O*t7zm*1!O6OzIbu&gavfZv?zg{7yU)Ry0Ztxp?oj<vU}8iEUo~;!B4zlXa<ycy
zoSx-XY|_?qADO+pwqnDl$29u#%UgI(pj?-K*6UBRXWkiVI3CZ=P>E02AGQWnski*-
z<v6+UNdY{$_Ut3vWGVY$el$)gYHUN(*~fo9;9qX^RW6Ov_E3IJ2eb1nLi(_@boCK-
z-!0)z?Wv2gh-)J<V{3vY?cr0WK8vr@Rx??TLN0_=cIp<JtXS5bG?ZBhj2U^tjPyi9
zybd~)e1#=%NzkpG+Ih92^wUst)2g;CmuJruy+B3TFC&x5#&*7fO8Z)F2$pI^L+9GY
zHizPBWC@>qNMTVu-Aq}iD7i88l8S=QII=0ebth@euF(rTBaxW8kvSXw%J{-?5M;IK
z3QQd+GEQDhk)e8{uLc=I9p%h>jdRsCzh-(}I(PXlz%(E0w)yQ0Xiqv#!hd6B(kF)U
zVDLs+ceDP>_o8}@SGQ!L(31q#Y-Ni3Q9A5O6x3g`>AZgWOo1_2t%}PcTqlYeb;DRc
zNSrN1Z4^|(kcw$9)^zv_PA~h`nj{9)Op3L#XfZQEXTZ4kO5$P5@$b><C-ll@1iE>u
z>PN6vt;xWrQbuKve!0F7@3=K$7SG~^*_<L8F{GQZB&Jx>>e`x38P;}`92xg7KIXBM
zjAh-nMU&;Fj3M|+&3L3ludS#y7i(%bE3-n-;yC7@_gTyiv4JO}h&|{v-Y2zyVx>=q
z-FBf4PPE6uv?@&)?mXZwdSGT0!^CMJn6&^)d;1eIaApNswE9wpi2bwQ{l;J-E%%^V
zO<|x)e*j0K-Qo#ro}qZMv<_YT?bAq-2|8&yzj4U)oV1}r7VLP7ya4po5Ytjp9T?09
zH%9bp5-ceQshJ71MMyVeo5kYEYml-M@nGFvpWR-mBteQ;H%gj>Fxo>N=So8jb5QZ*
zX}luYDYT<{jkGlOCDaBzHT3qz6`8`bZLgl#*7%ORo`PLM4?mBd@-Dp0-JvO~el}Mx
zbu#)9&miL5rX*LH%~G&HB0i^Je1*O0bvi@|uOA|19H&sA`bK>tQHZYeX*en#Sh8YJ
z2`9_YffygvBa|@PzX=+4<7&S!Jzeps3%0>Qo;*YCVrE5<W-IHnO|)#-NZADj=pW#`
z?H|f|>mn_psoB_+M5TJXW35z^;X1=WKsuu`cB4~a@r#gHc3?@oH@8YiRp-XvJmdT*
zQbI#ZU5ds}FfbEapq)Hx(6+<9H~SRh)DwfYC{E~U{ozl@G<<3U1*dS8dKwHa%UrZp
zq}#1*FJ0_eX1oXIiDEMK80^iiQCktTde9;q;LonmHB|iqk~iL{$C963`zj7425R=J
zpY(m+GZ4fMr<U#zVxg#h?yg(5iL1&zQ4$LP2A)mW)FO3mHMyr8a#)o`mqZB2uPecM
z7tT2!8B}43t=E!Rq4@Sb?)Esgzx-)+I7`TDaRef53h>aMysM0OT#{^~fJI2QpT<$}
z3eUV|`SyP*3p8}zf2xSP3Bdn_l)q>HwEsqy8Y;h4|38iVqj-LI`@h-b-^#zK*B>Rr
zUG@KO8Q9+@{)R<=OmN@fqdUg>4?6l=^*3PnqvFN-7u@i-@bBvDk5K83>i((4G*qze
U*eu|V*a881cPuNH^!MmL0Hmf#h5!Hn

literal 0
HcmV?d00001


From db83807e8f7242ed6dd6c74ca0086fa53dbb4d4c Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Thu, 8 Aug 2024 12:13:53 -0400
Subject: [PATCH 18/21] Hash central directory and add unit tests

---
 sdk/Cargo.toml                        |   4 +-
 sdk/src/assertions/collection_hash.rs | 213 ++++++++++++++------------
 2 files changed, 120 insertions(+), 97 deletions(-)

diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 27959873c..240d4119f 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -122,11 +122,11 @@ sha2 = "0.10.2"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
 treeline = "0.1.0"
-url = "2.2.2, <2.5.1"                                               # Can't use 2.5.1 or newer until new license is reviewed.
+url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed.
 uuid = { version = "1.7.0", features = ["serde", "v4", "js"] }
 x509-parser = "0.15.1"
 x509-certificate = "0.21.0"
-zip = { version = "0.6.6", default-features = false }
+zip = { git = "https://github.com/ok-nick/zip2.git", default-features = false }
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 ureq = "2.4.0"
diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs
index 595332438..571d755fc 100644
--- a/sdk/src/assertions/collection_hash.rs
+++ b/sdk/src/assertions/collection_hash.rs
@@ -1,7 +1,7 @@
 use std::{
     collections::HashMap,
     fs::{self, File},
-    io::{Read, Seek},
+    io::{Read, Seek, SeekFrom},
     path::{Component, Path, PathBuf},
 };
 
@@ -208,12 +208,13 @@ impl CollectionHash {
         let zip_central_directory_hash = hash_stream_by_alg(
             &alg,
             stream,
-            Some(vec![zip_central_directory_inclusions]),
+            Some(vec![zip_central_directory_inclusions.clone()]),
             false,
         )?;
         if zip_central_directory_hash.is_empty() {
             return Err(Error::BadParam("could not generate data hash".to_string()));
         }
+        self.zip_central_directory_hash_range = Some(zip_central_directory_inclusions);
         self.zip_central_directory_hash = Some(zip_central_directory_hash);
 
         self.uris = zip_uri_ranges(stream)?;
@@ -392,11 +393,18 @@ pub fn zip_central_directory_range<R>(reader: &mut R) -> Result<HashRange>
 where
     R: Read + Seek + ?Sized,
 {
-    let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
-
-    // TODO: https://github.com/zip-rs/zip2/issues/209
-
-    todo!()
+    let length = reader.seek(SeekFrom::End(0))?;
+    let reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;
+
+    let start = reader.central_directory_start();
+    let length = length - start;
+
+    Ok(HashRange::new(
+        usize::try_from(start)
+            .map_err(|_| Error::BadParam(format!("Value {} out of usize range", start)))?,
+        usize::try_from(length)
+            .map_err(|_| Error::BadParam(format!("Value {} out of usize range", length)))?,
+    ))
 }
 
 pub fn zip_uri_ranges<R>(stream: &mut R) -> Result<HashMap<PathBuf, UriHashedDataMap>>
@@ -455,91 +463,106 @@ where
     Ok(uri_map)
 }
 
-// TODO: blocked by central_directory_inclusions
-// #[cfg(test)]
-// mod tests {
-//     use std::io::Cursor;
-
-//     use super::*;
-
-//     const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
-
-// #[test]
-// fn test_zip_hash() -> Result<()> {
-//     let mut stream = Cursor::new(ZIP_SAMPLE1);
-
-//     let mut collection = CollectionHash {
-//         uris: Vec::new(),
-//         alg: None,
-//         zip_central_directory_hash: None,
-//         base_path: PathBuf::new(),
-//         zip_central_directory_hash_range: None,
-//     };
-//     collection.gen_hash_from_zip_stream(&mut stream)?;
-
-//     assert_eq!(collection.zip_central_directory_hash, vec![0]);
-//     assert_eq!(
-//         collection.zip_central_directory_hash_range,
-//         Some(HashRange::new(0, 0))
-//     );
-
-//     assert_eq!(
-//         collection.uris.first(),
-//         Some(&UriHashedDataMap {
-//             uri: PathBuf::from("sample1/test1.txt"),
-//             hash: Some(vec![0]),
-//             size: Some(47),
-//             dc_format: None,
-//             data_types: None,
-//             zip_hash_range: None,
-//         })
-//     );
-//     assert_eq!(
-//         collection.uris.get(1),
-//         Some(&UriHashedDataMap {
-//             uri: PathBuf::from("sample1/test1/test1.txt"),
-//             hash: Some(vec![0]),
-//             size: Some(57),
-//             dc_format: None,
-//             data_types: None,
-//             zip_hash_range: None,
-//         })
-//     );
-//     assert_eq!(
-//         collection.uris.get(2),
-//         Some(&UriHashedDataMap {
-//             uri: PathBuf::from("sample1/test1/test2.txt"),
-//             hash: Some(vec![0]),
-//             size: Some(53),
-//             dc_format: None,
-//             data_types: None,
-//             zip_hash_range: None,
-//         })
-//     );
-//     assert_eq!(
-//         collection.uris.get(3),
-//         Some(&UriHashedDataMap {
-//             uri: PathBuf::from("sample1/test1/test3.txt"),
-//             hash: Some(vec![0]),
-//             size: Some(68),
-//             dc_format: None,
-//             data_types: None,
-//             zip_hash_range: None,
-//         })
-//     );
-//     assert_eq!(
-//         collection.uris.get(4),
-//         Some(&UriHashedDataMap {
-//             uri: PathBuf::from("sample1/test2.txt"),
-//             hash: Some(vec![0]),
-//             size: Some(56),
-//             dc_format: None,
-//             data_types: None,
-//             zip_hash_range: None,
-//         })
-//     );
-//     assert_eq!(collection.uris.len(), 5);
-
-//     Ok(())
-// }
-// }
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+
+    use super::*;
+
+    const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip");
+
+    #[test]
+    fn test_zip_hash() -> Result<()> {
+        let mut stream = Cursor::new(ZIP_SAMPLE1);
+
+        let mut collection = CollectionHash {
+            uris: HashMap::new(),
+            alg: None,
+            zip_central_directory_hash: None,
+            base_path: None,
+            zip_central_directory_hash_range: None,
+        };
+        collection.gen_hash_from_zip_stream(&mut stream)?;
+
+        assert_eq!(
+            collection.zip_central_directory_hash,
+            Some(vec![
+                103, 27, 141, 219, 82, 200, 254, 44, 155, 221, 183, 146, 193, 94, 154, 77, 133, 93,
+                148, 88, 160, 123, 224, 170, 61, 140, 13, 2, 153, 86, 225, 231
+            ])
+        );
+        assert_eq!(
+            collection.zip_central_directory_hash_range,
+            Some(HashRange::new(369, 727))
+        );
+
+        assert_eq!(
+            collection.uris.get(Path::new("sample1/test1.txt")),
+            Some(&UriHashedDataMap {
+                hash: Some(vec![
+                    39, 147, 91, 240, 68, 172, 194, 43, 70, 207, 141, 151, 141, 239, 180, 17, 170,
+                    106, 248, 168, 169, 245, 207, 172, 29, 204, 80, 155, 37, 30, 186, 60
+                ]),
+                size: Some(47),
+                dc_format: Some("txt".to_string()),
+                data_types: None,
+                zip_hash_range: Some(HashRange::new(44, 47))
+            })
+        );
+        assert_eq!(
+            collection.uris.get(Path::new("sample1/test1/test1.txt")),
+            Some(&UriHashedDataMap {
+                hash: Some(vec![
+                    136, 103, 106, 251, 180, 19, 60, 244, 42, 171, 44, 215, 65, 252, 59, 127, 84,
+                    63, 175, 25, 6, 118, 200, 12, 188, 128, 67, 78, 249, 182, 242, 156
+                ]),
+                size: Some(57),
+                dc_format: Some("txt".to_string()),
+                data_types: None,
+                zip_hash_range: Some(HashRange::new(91, 57))
+            })
+        );
+        assert_eq!(
+            collection.uris.get(Path::new("sample1/test1/test2.txt")),
+            Some(&UriHashedDataMap {
+                hash: Some(vec![
+                    164, 100, 0, 41, 229, 201, 3, 228, 30, 254, 72, 205, 60, 70, 104, 78, 121, 21,
+                    187, 230, 19, 242, 52, 212, 181, 104, 99, 179, 177, 81, 150, 33
+                ]),
+                size: Some(53),
+                dc_format: Some("txt".to_string()),
+                data_types: None,
+                zip_hash_range: Some(HashRange::new(148, 53))
+            })
+        );
+        assert_eq!(
+            collection.uris.get(Path::new("sample1/test1/test3.txt")),
+            Some(&UriHashedDataMap {
+                hash: Some(vec![
+                    129, 96, 58, 105, 119, 67, 2, 71, 77, 151, 99, 201, 192, 32, 213, 77, 19, 22,
+                    106, 204, 158, 142, 176, 247, 251, 174, 145, 243, 12, 22, 151, 116
+                ]),
+                size: Some(68),
+                dc_format: Some("txt".to_string()),
+                data_types: None,
+                zip_hash_range: Some(HashRange::new(201, 68))
+            })
+        );
+        assert_eq!(
+            collection.uris.get(Path::new("sample1/test2.txt")),
+            Some(&UriHashedDataMap {
+                hash: Some(vec![
+                    118, 254, 231, 173, 246, 184, 45, 104, 69, 72, 23, 21, 177, 202, 184, 241, 162,
+                    36, 28, 55, 23, 62, 109, 143, 182, 233, 99, 144, 23, 139, 9, 118
+                ]),
+                size: Some(56),
+                dc_format: Some("txt".to_string()),
+                data_types: None,
+                zip_hash_range: Some(HashRange::new(313, 56))
+            })
+        );
+        assert_eq!(collection.uris.len(), 5);
+
+        Ok(())
+    }
+}

From c2feb82dd7b2c7c0325ab20ee4d9fa76ccb5c637 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Thu, 8 Aug 2024 12:20:23 -0400
Subject: [PATCH 19/21] Fix thiserror dependency conflict

---
 sdk/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 240d4119f..18a1b62ac 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -120,7 +120,7 @@ serde_with = "3.4.0"
 serde-transcode = "1.1.1"
 sha2 = "0.10.2"
 tempfile = "3.10.1"
-thiserror = "1.0.61"
+thiserror = "1.0.63"
 treeline = "0.1.0"
 url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed.
 uuid = { version = "1.7.0", features = ["serde", "v4", "js"] }

From 97ebd569dbeacfd59fecfa67ac58eb0e02324a11 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Thu, 29 Aug 2024 13:27:31 -0400
Subject: [PATCH 20/21] Use latest zip crate (with fix)

---
 sdk/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 18a1b62ac..3f57ab772 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -126,7 +126,7 @@ url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed.
 uuid = { version = "1.7.0", features = ["serde", "v4", "js"] }
 x509-parser = "0.15.1"
 x509-certificate = "0.21.0"
-zip = { git = "https://github.com/ok-nick/zip2.git", default-features = false }
+zip = "2.2.0"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 ureq = "2.4.0"

From a5d05335856f1d8724353df11b569f22e91424e6 Mon Sep 17 00:00:00 2001
From: ok-nick <nick.libraries@gmail.com>
Date: Thu, 29 Aug 2024 13:41:23 -0400
Subject: [PATCH 21/21] Update log crate to fix dependency conflict

---
 make_test_images/Cargo.toml | 2 +-
 sdk/Cargo.toml              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/make_test_images/Cargo.toml b/make_test_images/Cargo.toml
index e9cf79940..1e1d569e5 100644
--- a/make_test_images/Cargo.toml
+++ b/make_test_images/Cargo.toml
@@ -15,7 +15,7 @@ c2pa = { path = "../sdk", default-features = false, features = [
 	"file_io",
 ] }
 env_logger = "0.11"
-log = "0.4.8"
+log = "0.4.22"
 image = { version = "0.25.2", default-features = false, features = [
 	"jpeg",
 	"png",
diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 72213dd73..0fdebfdec 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -95,7 +95,7 @@ hex = "0.4.3"
 id3 = "=1.12.0"
 img-parts = "0.3.0"
 jfifdump = "0.5.1"
-log = "0.4.8"
+log = "0.4.22"
 lopdf = { version = "0.31.0", optional = true }
 lazy_static = "1.4.0"
 memchr = "2.7.4"