From 2c22931580bbbb526e0d67d04eb95dfc07ed8434 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Mon, 8 Jul 2024 11:13:01 -0400 Subject: [PATCH 01/21] Foundation for ZIP support --- sdk/Cargo.toml | 4 +- sdk/src/asset_handlers/mod.rs | 1 + sdk/src/asset_handlers/zip_io.rs | 233 +++++++++++++++++++++++++++++++ sdk/src/builder.rs | 4 +- sdk/src/jumbf_io.rs | 7 +- 5 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 sdk/src/asset_handlers/zip_io.rs diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 798c3a6ef..aed7018c3 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -121,11 +121,11 @@ sha2 = "0.10.2" tempfile = "3.10.1" thiserror = "1.0.61" treeline = "0.1.0" -url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. +url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. uuid = { version = "1.3.1", features = ["serde", "v4", "wasm-bindgen"] } x509-parser = "0.15.1" x509-certificate = "0.19.0" -zip = { version = "0.6.6", default-features = false } +zip = { version = "2.1.3", default-features = false } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] ureq = "2.4.0" diff --git a/sdk/src/asset_handlers/mod.rs b/sdk/src/asset_handlers/mod.rs index 8bc7bfba6..dc44b0523 100644 --- a/sdk/src/asset_handlers/mod.rs +++ b/sdk/src/asset_handlers/mod.rs @@ -19,6 +19,7 @@ pub mod png_io; pub mod riff_io; pub mod svg_io; pub mod tiff_io; +pub mod zip_io; #[cfg(feature = "pdf")] pub(crate) mod pdf; diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs new file mode 100644 index 000000000..a411cdea6 --- /dev/null +++ b/sdk/src/asset_handlers/zip_io.rs @@ -0,0 +1,233 @@ +use std::{ + fs::{self, File}, + io::{self, Read}, + path::Path, +}; + +use tempfile::Builder; +use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; + +use crate::{ + asset_io::{ + self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, + HashObjectPositions, + }, + error::Result, + CAIRead, CAIReadWrite, Error, +}; + +pub struct ZipIO {} + +impl CAIWriter for ZipIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + mut store_bytes: &[u8], + ) -> Result<()> { + let mut writer = self + .writer(input_stream, output_stream) + .map_err(|_| Error::EmbeddingError)?; + + // TODO: what happens if the dir exists? + writer + .add_directory("META-INF", SimpleFileOptions::default()) + .map_err(|_| Error::EmbeddingError)?; + + writer + .start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default().compression_method(CompressionMethod::Stored), + ) + .map_err(|_| Error::EmbeddingError)?; + io::copy(&mut store_bytes, &mut writer)?; + writer.finish().map_err(|_| Error::EmbeddingError)?; + + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + _input_stream: &mut dyn CAIRead, + ) -> Result> { + // TODO: error? + Ok(Vec::new()) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + let mut writer = self + .writer(input_stream, output_stream) + .map_err(|_| Error::EmbeddingError)?; + + writer + .start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default(), + ) + .map_err(|_| Error::EmbeddingError)?; + writer.abort_file().map_err(|_| Error::EmbeddingError)?; + writer.finish().map_err(|_| Error::EmbeddingError)?; + + Ok(()) + } +} + +impl CAIReader for ZipIO { + fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result> { + let mut reader = self + .reader(asset_reader) + .map_err(|_| Error::JumbfNotFound)?; + + let index = reader + .index_for_path(Path::new("META-INF/content_credential.c2pa")) + .ok_or(Error::JumbfNotFound)?; + let mut file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; + + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes)?; + + Ok(bytes) + } + + fn read_xmp(&self, _asset_reader: &mut dyn CAIRead) -> Option { + None + } +} + +impl AssetIO for ZipIO { + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + ZipIO {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(ZipIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(ZipIO::new(asset_type))) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> { + let mut stream = fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut stream, &mut temp_file, store_bytes)?; + + asset_io::rename_or_move(temp_file, asset_path) + } + + fn get_object_locations(&self, asset_path: &Path) -> Result> { + let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?; + self.get_object_locations_from_stream(&mut f) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + let mut stream = fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.remove_cai_store_from_stream(&mut stream, &mut temp_file)?; + + asset_io::rename_or_move(temp_file, asset_path) + } + + fn supported_types(&self) -> &[&str] { + &[ + // Zip + "zip", + "application/x-zip", + // EPUB + "epub", + "application/epub+zip", + // Office Open XML + "docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "docm", + "application/vnd.ms-word.document.macroEnabled.12", + "xlsm", + "application/vnd.ms-excel.sheet.macroEnabled.12", + "pptm", + "application/vnd.ms-powerpoint.presentation.macroEnabled.12", + // Open Document + "odt", + "application/vnd.oasis.opendocument.text", + "ods", + "application/vnd.oasis.opendocument.spreadsheet", + "odp", + "application/vnd.oasis.opendocument.presentation", + "odg", + "application/vnd.oasis.opendocument.graphics", + "ott", + "application/vnd.oasis.opendocument.text-template", + "ots", + "application/vnd.oasis.opendocument.spreadsheet-template", + "otp", + "application/vnd.oasis.opendocument.presentation-template", + "otg", + "application/vnd.oasis.opendocument.graphics-template", + // OpenXPS + "oxps", + "application/oxps", + ] + } +} + +impl ZipIO { + fn writer<'a>( + &self, + input_stream: &'a mut dyn CAIRead, + output_stream: &'a mut dyn CAIReadWrite, + ) -> ZipResult>> { + let mut writer = ZipWriter::new_append(CAIReadWriteWrapper { + reader_writer: output_stream, + })?; + + writer.merge_archive(ZipArchive::new(CAIReadWrapper { + reader: input_stream, + })?)?; + + Ok(writer) + } + + fn reader<'a>( + &self, + input_stream: &'a mut dyn CAIRead, + ) -> ZipResult>> { + ZipArchive::new(CAIReadWrapper { + reader: input_stream, + }) + } +} diff --git a/sdk/src/builder.rs b/sdk/src/builder.rs index 4e8005389..8eb4f1258 100644 --- a/sdk/src/builder.rs +++ b/sdk/src/builder.rs @@ -20,7 +20,7 @@ use async_generic::async_generic; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use uuid::Uuid; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; use crate::{ assertion::AssertionBase, @@ -370,7 +370,7 @@ impl Builder { { let mut zip = ZipWriter::new(stream); let options = - FileOptions::default().compression_method(zip::CompressionMethod::Stored); + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); zip.start_file("manifest.json", options) .map_err(|e| Error::OtherError(Box::new(e)))?; zip.write_all(&serde_json::to_vec(self)?)?; diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs index 6b23478e5..f96ac4d09 100644 --- a/sdk/src/jumbf_io.rs +++ b/sdk/src/jumbf_io.rs @@ -28,7 +28,7 @@ use crate::asset_handlers::pdf_io::PdfIO; use crate::{ asset_handlers::{ bmff_io::BmffIO, c2pa_io::C2paIO, jpeg_io::JpegIO, mp3_io::Mp3IO, png_io::PngIO, - riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO, + riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO, zip_io::ZipIO, }, asset_io::{AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashObjectPositions}, error::{Error, Result}, @@ -48,6 +48,7 @@ lazy_static! { Box::new(SvgIO::new("")), Box::new(TiffIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; let mut handler_map = HashMap::new(); @@ -76,6 +77,7 @@ lazy_static! { Box::new(SvgIO::new("")), Box::new(TiffIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; let mut handler_map = HashMap::new(); @@ -364,6 +366,7 @@ pub mod tests { Box::new(TiffIO::new("")), Box::new(SvgIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; // build handler map @@ -388,6 +391,7 @@ pub mod tests { Box::new(TiffIO::new("")), Box::new(SvgIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; // build handler map @@ -405,6 +409,7 @@ pub mod tests { Box::new(JpegIO::new("")), Box::new(PngIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), Box::new(SvgIO::new("")), Box::new(RiffIO::new("")), ]; From a84411a2f5e93ab588fac92160f6688a8d547ce7 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Mon, 8 Jul 2024 15:19:29 -0400 Subject: [PATCH 02/21] Foundation for collection data hash assertion --- sdk/src/assertions/collection_hash.rs | 79 +++++++++++++++++++++++++++ sdk/src/assertions/mod.rs | 3 + sdk/src/asset_handlers/zip_io.rs | 41 +++++++++++++- sdk/src/utils/hash_utils.rs | 2 + 4 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 sdk/src/assertions/collection_hash.rs diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs new file mode 100644 index 000000000..305f0d005 --- /dev/null +++ b/sdk/src/assertions/collection_hash.rs @@ -0,0 +1,79 @@ +use std::io::{Read, Seek}; + +use serde::{Deserialize, Serialize}; + +use crate::{assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, Error, Result}; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct CollectionHash { + pub uri_maps: Vec, + + #[serde(skip_serializing_if = "Option::is_none")] + pub alg: Option, + + #[serde(skip_serializing_if = "Option::is_none", with = "serde_bytes")] + pub zip_central_directory_hash: Option>, +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct UriHashedDataMap { + pub uri: String, + + #[serde(with = "serde_bytes")] + pub hash: Vec, + + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + + #[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")] + pub dc_format: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub data_types: Option>, +} + +impl CollectionHash { + pub fn new(alg: String) -> Self { + CollectionHash { + uri_maps: Vec::new(), + alg: Some(alg), + zip_central_directory_hash: None, + } + } + + fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { + self.uri_maps.push(uri_map); + } + + // TODO: support custom collection hashes + pub fn gen_hash_from_stream(&mut self, stream: &mut R) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = match self.alg { + Some(ref a) => a.clone(), + None => "sha256".to_string(), + }; + + let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; + let zip_central_directory_hash = + hash_stream_by_alg(&alg, stream, Some(zip_central_directory_inclusions), false)?; + if zip_central_directory_hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + self.zip_central_directory_hash = Some(zip_central_directory_hash); + + let uri_inclusions = zip_io::uri_inclusions(stream, &self.uri_maps)?; + for (i, uri_map) in self.uri_maps.iter_mut().enumerate() { + let hash = + hash_stream_by_alg(&alg, stream, Some(vec![uri_inclusions[i].clone()]), false)?; + if hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + + uri_map.hash = hash; + } + + Ok(()) + } +} diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs index dae13620e..8c71b3993 100644 --- a/sdk/src/assertions/mod.rs +++ b/sdk/src/assertions/mod.rs @@ -25,6 +25,9 @@ pub use box_hash::{BoxHash, BoxMap, C2PA_BOXHASH}; mod data_hash; pub use data_hash::DataHash; +mod collection_hash; +pub use collection_hash::{CollectionHash, UriHashedDataMap}; + mod creative_work; pub use creative_work::CreativeWork; diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index a411cdea6..18c59fed4 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -1,6 +1,6 @@ use std::{ fs::{self, File}, - io::{self, Read}, + io::{self, Read, Seek}, path::Path, }; @@ -8,12 +8,13 @@ use tempfile::Builder; use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; use crate::{ + assertions::UriHashedDataMap, asset_io::{ self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, HashObjectPositions, }, error::Result, - CAIRead, CAIReadWrite, Error, + CAIRead, CAIReadWrite, Error, HashRange, }; pub struct ZipIO {} @@ -231,3 +232,39 @@ impl ZipIO { }) } } + +pub fn central_directory_inclusions(reader: &mut R) -> Result> +where + R: Read + Seek + ?Sized, +{ + let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; + + // TODO: https://github.com/zip-rs/zip2/pull/71 + // or + // https://gitlab.com/xMAC94x/zip-core (https://github.com/zip-rs/zip2/issues/204) + + todo!() +} + +pub fn uri_inclusions(reader: &mut R, uri_maps: &[UriHashedDataMap]) -> Result> +where + R: Read + Seek + ?Sized, +{ + let mut reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; + + let mut ranges = Vec::new(); + for uri_map in uri_maps { + let index = reader + .index_for_path(Path::new(&uri_map.uri)) + .ok_or(Error::JumbfNotFound)?; + let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; + // TODO: hash from header or data? does compressed_size include header? + // and fix error type + ranges.push(HashRange::new( + usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, + usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, + )); + } + + Ok(ranges) +} diff --git a/sdk/src/utils/hash_utils.rs b/sdk/src/utils/hash_utils.rs index 241769204..056b9f4dd 100644 --- a/sdk/src/utils/hash_utils.rs +++ b/sdk/src/utils/hash_utils.rs @@ -206,6 +206,8 @@ pub fn hash_asset_by_alg_with_inclusions( to_be_hashed: [IIIIIXXXXXMIIIIIMXXXXXMXXXXIII...III] The data is again split into range sets breaking at the exclusion points and now also the markers. + + // TODO: describe collection hash */ pub fn hash_stream_by_alg( alg: &str, From c03d65393bc330e96b227aa5e2b9b840c04df11e Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 10:50:42 -0400 Subject: [PATCH 03/21] Collection assertion hash resolver --- sdk/src/assertions/collection_hash.rs | 60 +++++++++++++++++++-------- sdk/src/assertions/mod.rs | 2 +- sdk/src/asset_handlers/zip_io.rs | 53 ++++++++++++++--------- 3 files changed, 78 insertions(+), 37 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 305f0d005..7c4695f75 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -2,7 +2,11 @@ use std::io::{Read, Seek}; use serde::{Deserialize, Serialize}; -use crate::{assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, Error, Result}; +use crate::{ + assertions::AssetType, + asset_handlers::zip_io::{self, ZipHashResolver}, + hash_stream_by_alg, CAIRead, Error, HashRange, Result, +}; #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct CollectionHash { @@ -41,19 +45,34 @@ impl CollectionHash { } } - fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { + pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { self.uri_maps.push(uri_map); } - // TODO: support custom collection hashes - pub fn gen_hash_from_stream(&mut self, stream: &mut R) -> Result<()> + pub fn gen_hash_from_stream(&mut self, stream: &mut R, mut resolver: T) -> Result<()> where R: Read + Seek + ?Sized, + T: UriHashResolver, { - let alg = match self.alg { - Some(ref a) => a.clone(), - None => "sha256".to_string(), - }; + let alg = self.alg(); + for uri_map in &mut self.uri_maps { + let inclusions = resolver.resolve(uri_map); + let hash = hash_stream_by_alg(&alg, stream, Some(inclusions), false)?; + if hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + + uri_map.hash = hash; + } + + Ok(()) + } + + pub fn gen_hash_from_zip_stream(&mut self, stream: &mut R) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = self.alg(); let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; let zip_central_directory_hash = @@ -63,17 +82,24 @@ impl CollectionHash { } self.zip_central_directory_hash = Some(zip_central_directory_hash); - let uri_inclusions = zip_io::uri_inclusions(stream, &self.uri_maps)?; - for (i, uri_map) in self.uri_maps.iter_mut().enumerate() { - let hash = - hash_stream_by_alg(&alg, stream, Some(vec![uri_inclusions[i].clone()]), false)?; - if hash.is_empty() { - return Err(Error::BadParam("could not generate data hash".to_string())); - } + let resolver = ZipHashResolver::new(stream, &self.uri_maps)?; + self.gen_hash_from_stream(stream, resolver)?; - uri_map.hash = hash; - } + Ok(()) + } + pub fn verify_stream_hash(&self, reader: &mut dyn CAIRead, alg: Option<&str>) -> Result<()> { Ok(()) } + + fn alg(&self) -> String { + match self.alg { + Some(ref a) => a.clone(), + None => "sha256".to_string(), + } + } +} + +pub trait UriHashResolver { + fn resolve(&mut self, uri_map: &UriHashedDataMap) -> Vec; } diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs index 8c71b3993..067c893c1 100644 --- a/sdk/src/assertions/mod.rs +++ b/sdk/src/assertions/mod.rs @@ -26,7 +26,7 @@ mod data_hash; pub use data_hash::DataHash; mod collection_hash; -pub use collection_hash::{CollectionHash, UriHashedDataMap}; +pub use collection_hash::{CollectionHash, UriHashResolver, UriHashedDataMap}; mod creative_work; pub use creative_work::CreativeWork; diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 18c59fed4..b81cb03b8 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -8,7 +8,7 @@ use tempfile::Builder; use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; use crate::{ - assertions::UriHashedDataMap, + assertions::{UriHashResolver, UriHashedDataMap}, asset_io::{ self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, HashObjectPositions, @@ -246,25 +246,40 @@ where todo!() } -pub fn uri_inclusions(reader: &mut R, uri_maps: &[UriHashedDataMap]) -> Result> -where - R: Read + Seek + ?Sized, -{ - let mut reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; +pub struct ZipHashResolver { + ranges: Vec, + i: usize, +} - let mut ranges = Vec::new(); - for uri_map in uri_maps { - let index = reader - .index_for_path(Path::new(&uri_map.uri)) - .ok_or(Error::JumbfNotFound)?; - let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; - // TODO: hash from header or data? does compressed_size include header? - // and fix error type - ranges.push(HashRange::new( - usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, - usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, - )); +impl ZipHashResolver { + pub fn new( + stream: &mut R, + uri_maps: &[UriHashedDataMap], + ) -> Result { + let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; + + let mut ranges = Vec::new(); + for uri_map in uri_maps { + let index = reader + .index_for_path(Path::new(&uri_map.uri)) + .ok_or(Error::JumbfNotFound)?; + let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; + // TODO: hash from header or data? does compressed_size include header? + // and fix error type + ranges.push(HashRange::new( + usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, + usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, + )); + } + + Ok(Self { ranges, i: 0 }) } +} - Ok(ranges) +impl UriHashResolver for ZipHashResolver { + fn resolve(&mut self, _uri_map: &UriHashedDataMap) -> Vec { + let range = self.ranges[self.i].clone(); + self.i += 1; + vec![range] + } } From 9e4af21033cf5121ecf6dca849bf4777b00315de Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 14:36:53 -0400 Subject: [PATCH 04/21] Rework collection hash assertion --- sdk/src/assertions/collection_hash.rs | 162 +++++++++++++++++++++----- sdk/src/assertions/mod.rs | 2 +- sdk/src/asset_handlers/zip_io.rs | 64 +++++----- 3 files changed, 164 insertions(+), 64 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 7c4695f75..f183d6927 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -1,16 +1,19 @@ -use std::io::{Read, Seek}; +use std::{ + fs::File, + io::{Read, Seek}, + path::{Path, PathBuf}, +}; use serde::{Deserialize, Serialize}; use crate::{ - assertions::AssetType, - asset_handlers::zip_io::{self, ZipHashResolver}, - hash_stream_by_alg, CAIRead, Error, HashRange, Result, + assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, + hash_utils::verify_stream_by_alg, Error, HashRange, Result, }; #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct CollectionHash { - pub uri_maps: Vec, + pub uris: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub alg: Option, @@ -21,7 +24,7 @@ pub struct CollectionHash { #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct UriHashedDataMap { - pub uri: String, + pub uri: PathBuf, #[serde(with = "serde_bytes")] pub hash: Vec, @@ -34,45 +37,63 @@ pub struct UriHashedDataMap { #[serde(skip_serializing_if = "Option::is_none")] pub data_types: Option>, + + // When parsing zips we can cache the hash ranges as well in one shot. + #[serde(skip)] + pub(crate) zip_inclusion: Option, } impl CollectionHash { pub fn new(alg: String) -> Self { CollectionHash { - uri_maps: Vec::new(), + uris: Vec::new(), alg: Some(alg), zip_central_directory_hash: None, } } pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { - self.uri_maps.push(uri_map); + self.uris.push(uri_map); } - pub fn gen_hash_from_stream(&mut self, stream: &mut R, mut resolver: T) -> Result<()> + // TODO: is it safe to assume self.uris includes the stream that's being embedded into? or should + // we pass it as a param? + pub fn gen_hash(&mut self, base_path: &Path) -> Result<()> where R: Read + Seek + ?Sized, - T: UriHashResolver, { - let alg = self.alg(); - for uri_map in &mut self.uri_maps { - let inclusions = resolver.resolve(uri_map); - let hash = hash_stream_by_alg(&alg, stream, Some(inclusions), false)?; - if hash.is_empty() { - return Err(Error::BadParam("could not generate data hash".to_string())); - } - - uri_map.hash = hash; + let alg = self.alg().to_owned(); + for uri_map in &mut self.uris { + let path = base_path.join(&uri_map.uri); + let mut file = File::open(path)?; + let file_len = file.metadata()?.len(); + + uri_map.hash = hash_stream_by_alg( + &alg, + &mut file, + // TODO: temp unwrap + #[allow(clippy::unwrap_used)] + Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + false, + )?; } Ok(()) } + pub fn gen_uris_from_zip_stream(&mut self, stream: &mut R) -> Result<()> + where + R: Read + Seek + ?Sized, + { + self.uris = zip_io::uri_inclusions(stream)?; + Ok(()) + } + pub fn gen_hash_from_zip_stream(&mut self, stream: &mut R) -> Result<()> where R: Read + Seek + ?Sized, { - let alg = self.alg(); + let alg = self.alg().to_owned(); let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; let zip_central_directory_hash = @@ -82,24 +103,105 @@ impl CollectionHash { } self.zip_central_directory_hash = Some(zip_central_directory_hash); - let resolver = ZipHashResolver::new(stream, &self.uri_maps)?; - self.gen_hash_from_stream(stream, resolver)?; + for uri_map in self.uris.iter_mut() { + match &uri_map.zip_inclusion { + Some(inclusion) => { + let hash = + hash_stream_by_alg(&alg, stream, Some(vec![inclusion.clone()]), false)?; + if hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + + uri_map.hash = hash; + } + None => { + return Err(Error::BadParam( + "must generate zip stream uris before generating hashes".to_owned(), + )) + } + } + } Ok(()) } - pub fn verify_stream_hash(&self, reader: &mut dyn CAIRead, alg: Option<&str>) -> Result<()> { + pub fn verify_stream_hash(&self, alg: Option<&str>, base_path: &Path) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = alg.unwrap_or_else(|| self.alg()); + for uri_map in &self.uris { + let path = base_path.join(&uri_map.uri); + let mut file = File::open(&path)?; + let file_len = file.metadata()?.len(); + + if !verify_stream_by_alg( + alg, + &uri_map.hash, + &mut file, + // TODO: temp unwrap + #[allow(clippy::unwrap_used)] + Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + path.display() + ))); + } + } + Ok(()) } - fn alg(&self) -> String { - match self.alg { - Some(ref a) => a.clone(), - None => "sha256".to_string(), + pub fn verify_zip_stream_hash(&self, stream: &mut R, alg: Option<&str>) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = alg.unwrap_or_else(|| self.alg()); + let central_directory_hash = match &self.zip_central_directory_hash { + Some(hash) => Ok(hash), + None => Err(Error::BadParam( + "Missing zip central directory hash".to_owned(), + )), + }?; + let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; + if !verify_stream_by_alg( + alg, + central_directory_hash, + stream, + Some(zip_central_directory_inclusions), + false, + ) { + return Err(Error::HashMismatch( + "Hashes do not match for zip central directory".to_owned(), + )); + } + + // TODO: we don't need to generate new uri maps, only ranges, and we only need the ranges for the + // files that exist in the uri_map, or should we always do all of them? + let uris = zip_io::uri_inclusions(stream)?; + for (uri_map, uri_map_inclusion) in self.uris.iter().zip(uris) { + if !verify_stream_by_alg( + alg, + &uri_map.hash, + stream, + // Safe to unwrap because zip_io::uri_inclusions guarantees this field to be valid. + #[allow(clippy::unwrap_used)] + Some(vec![uri_map_inclusion.zip_inclusion.unwrap()]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + uri_map.uri.display() + ))); + } } + + Ok(()) } -} -pub trait UriHashResolver { - fn resolve(&mut self, uri_map: &UriHashedDataMap) -> Vec; + fn alg(&self) -> &str { + self.alg.as_deref().unwrap_or("sha256") + } } diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs index 067c893c1..8c71b3993 100644 --- a/sdk/src/assertions/mod.rs +++ b/sdk/src/assertions/mod.rs @@ -26,7 +26,7 @@ mod data_hash; pub use data_hash::DataHash; mod collection_hash; -pub use collection_hash::{CollectionHash, UriHashResolver, UriHashedDataMap}; +pub use collection_hash::{CollectionHash, UriHashedDataMap}; mod creative_work; pub use creative_work::CreativeWork; diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index b81cb03b8..34eeeefae 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -8,7 +8,7 @@ use tempfile::Builder; use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; use crate::{ - assertions::{UriHashResolver, UriHashedDataMap}, + assertions::UriHashedDataMap, asset_io::{ self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, HashObjectPositions, @@ -246,40 +246,38 @@ where todo!() } -pub struct ZipHashResolver { - ranges: Vec, - i: usize, -} +pub fn uri_inclusions(stream: &mut R) -> Result> +where + R: Read + Seek + ?Sized, +{ + let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; -impl ZipHashResolver { - pub fn new( - stream: &mut R, - uri_maps: &[UriHashedDataMap], - ) -> Result { - let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; - - let mut ranges = Vec::new(); - for uri_map in uri_maps { - let index = reader - .index_for_path(Path::new(&uri_map.uri)) - .ok_or(Error::JumbfNotFound)?; - let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; - // TODO: hash from header or data? does compressed_size include header? - // and fix error type - ranges.push(HashRange::new( - usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, - usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, - )); - } + let mut ranges = Vec::new(); + let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); + for file_name in file_names { + let file = reader + .by_name(&file_name) + .map_err(|_| Error::JumbfNotFound)?; - Ok(Self { ranges, i: 0 }) + if !file.is_dir() { + ranges.push(UriHashedDataMap { + // TODO: temp unwrap + #[allow(clippy::unwrap_used)] + uri: file.enclosed_name().unwrap(), + hash: Vec::new(), + // TODO: same here + size: Some(file.header_start() - file.compressed_size()), + dc_format: None, // TODO + data_types: None, // TODO + // TODO: hash from header or data? does compressed_size include header? + // and fix error type + zip_inclusion: Some(HashRange::new( + usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, + usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, + )), + }); + } } -} -impl UriHashResolver for ZipHashResolver { - fn resolve(&mut self, _uri_map: &UriHashedDataMap) -> Vec { - let range = self.ranges[self.i].clone(); - self.i += 1; - vec![range] - } + Ok(ranges) } From 6559e6fe6c093b6fe85ef86eb6aa18b93bd65f4b Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 15:06:50 -0400 Subject: [PATCH 05/21] More collection assertion validation --- sdk/src/assertions/collection_hash.rs | 47 ++++++++------------------- sdk/src/asset_handlers/zip_io.rs | 39 +++++++++++++++++----- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index f183d6927..490c8ffab 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -37,10 +37,6 @@ pub struct UriHashedDataMap { #[serde(skip_serializing_if = "Option::is_none")] pub data_types: Option>, - - // When parsing zips we can cache the hash ranges as well in one shot. - #[serde(skip)] - pub(crate) zip_inclusion: Option, } impl CollectionHash { @@ -81,11 +77,13 @@ impl CollectionHash { Ok(()) } + // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all + // possible valid URIs — we don't want duplicates! pub fn gen_uris_from_zip_stream(&mut self, stream: &mut R) -> Result<()> where R: Read + Seek + ?Sized, { - self.uris = zip_io::uri_inclusions(stream)?; + self.uris = zip_io::uri_maps(stream)?; Ok(()) } @@ -103,23 +101,14 @@ impl CollectionHash { } self.zip_central_directory_hash = Some(zip_central_directory_hash); - for uri_map in self.uris.iter_mut() { - match &uri_map.zip_inclusion { - Some(inclusion) => { - let hash = - hash_stream_by_alg(&alg, stream, Some(vec![inclusion.clone()]), false)?; - if hash.is_empty() { - return Err(Error::BadParam("could not generate data hash".to_string())); - } - - uri_map.hash = hash; - } - None => { - return Err(Error::BadParam( - "must generate zip stream uris before generating hashes".to_owned(), - )) - } + let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?; + for (uri_map, hash_range) in self.uris.iter_mut().zip(hash_ranges) { + let hash = hash_stream_by_alg(&alg, stream, Some(vec![hash_range]), false)?; + if hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); } + + uri_map.hash = hash; } Ok(()) @@ -178,19 +167,9 @@ impl CollectionHash { )); } - // TODO: we don't need to generate new uri maps, only ranges, and we only need the ranges for the - // files that exist in the uri_map, or should we always do all of them? - let uris = zip_io::uri_inclusions(stream)?; - for (uri_map, uri_map_inclusion) in self.uris.iter().zip(uris) { - if !verify_stream_by_alg( - alg, - &uri_map.hash, - stream, - // Safe to unwrap because zip_io::uri_inclusions guarantees this field to be valid. - #[allow(clippy::unwrap_used)] - Some(vec![uri_map_inclusion.zip_inclusion.unwrap()]), - false, - ) { + let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?; + for (uri_map, hash_range) in self.uris.iter().zip(hash_ranges) { + if !verify_stream_by_alg(alg, &uri_map.hash, stream, Some(vec![hash_range]), false) { return Err(Error::HashMismatch(format!( "hash for {} does not match", uri_map.uri.display() diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 34eeeefae..3f0c10c37 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -233,6 +233,7 @@ impl ZipIO { } } +// TODO: probably doesn't need to return a vec pub fn central_directory_inclusions(reader: &mut R) -> Result> where R: Read + Seek + ?Sized, @@ -246,13 +247,13 @@ where todo!() } -pub fn uri_inclusions(stream: &mut R) -> Result> +pub fn uri_maps(stream: &mut R) -> Result> where R: Read + Seek + ?Sized, { let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; - let mut ranges = Vec::new(); + let mut uri_maps = Vec::new(); let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); for file_name in file_names { let file = reader @@ -260,7 +261,7 @@ where .map_err(|_| Error::JumbfNotFound)?; if !file.is_dir() { - ranges.push(UriHashedDataMap { + uri_maps.push(UriHashedDataMap { // TODO: temp unwrap #[allow(clippy::unwrap_used)] uri: file.enclosed_name().unwrap(), @@ -269,15 +270,35 @@ where size: Some(file.header_start() - file.compressed_size()), dc_format: None, // TODO data_types: None, // TODO - // TODO: hash from header or data? does compressed_size include header? - // and fix error type - zip_inclusion: Some(HashRange::new( - usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, - usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, - )), }); } } + Ok(uri_maps) +} + +pub fn uri_inclusions(stream: &mut R, uri_maps: &[UriHashedDataMap]) -> Result> +where + R: Read + Seek + ?Sized, +{ + let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; + + let mut ranges = Vec::new(); + for uri_map in uri_maps { + let index = reader + .index_for_path(&uri_map.uri) + .ok_or(Error::JumbfNotFound)?; + let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; + + if !file.is_dir() { + // TODO: hash from header or data? does compressed_size include header? + // and fix error type + ranges.push(HashRange::new( + usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, + usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, + )); + } + } + Ok(ranges) } From ac1a12d1c1373030c591e1502fb11ef7a51b2550 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 15:25:21 -0400 Subject: [PATCH 06/21] Fix ZIP unit tests --- sdk/src/assertions/collection_hash.rs | 58 +++++++++++++-------------- sdk/src/asset_handlers/zip_io.rs | 24 +++++------ 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 490c8ffab..87a315d5c 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -77,6 +77,35 @@ impl CollectionHash { Ok(()) } + pub fn verify_hash(&self, alg: Option<&str>, base_path: &Path) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = alg.unwrap_or_else(|| self.alg()); + for uri_map in &self.uris { + let path = base_path.join(&uri_map.uri); + let mut file = File::open(&path)?; + let file_len = file.metadata()?.len(); + + if !verify_stream_by_alg( + alg, + &uri_map.hash, + &mut file, + // TODO: temp unwrap + #[allow(clippy::unwrap_used)] + Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + path.display() + ))); + } + } + + Ok(()) + } + // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all // possible valid URIs — we don't want duplicates! pub fn gen_uris_from_zip_stream(&mut self, stream: &mut R) -> Result<()> @@ -114,35 +143,6 @@ impl CollectionHash { Ok(()) } - pub fn verify_stream_hash(&self, alg: Option<&str>, base_path: &Path) -> Result<()> - where - R: Read + Seek + ?Sized, - { - let alg = alg.unwrap_or_else(|| self.alg()); - for uri_map in &self.uris { - let path = base_path.join(&uri_map.uri); - let mut file = File::open(&path)?; - let file_len = file.metadata()?.len(); - - if !verify_stream_by_alg( - alg, - &uri_map.hash, - &mut file, - // TODO: temp unwrap - #[allow(clippy::unwrap_used)] - Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), - false, - ) { - return Err(Error::HashMismatch(format!( - "hash for {} does not match", - path.display() - ))); - } - } - - Ok(()) - } - pub fn verify_zip_stream_hash(&self, stream: &mut R, alg: Option<&str>) -> Result<()> where R: Read + Seek + ?Sized, diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 3f0c10c37..cff59f566 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -177,11 +177,11 @@ impl AssetIO for ZipIO { "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "docm", - "application/vnd.ms-word.document.macroEnabled.12", + "application/vnd.ms-word.document.macroenabled.12", "xlsm", - "application/vnd.ms-excel.sheet.macroEnabled.12", + "application/vnd.ms-excel.sheet.macroenabled.12", "pptm", - "application/vnd.ms-powerpoint.presentation.macroEnabled.12", + "application/vnd.ms-powerpoint.presentation.macroenabled.12", // Open Document "odt", "application/vnd.oasis.opendocument.text", @@ -207,6 +207,15 @@ impl AssetIO for ZipIO { } impl ZipIO { + fn reader<'a>( + &self, + input_stream: &'a mut dyn CAIRead, + ) -> ZipResult>> { + ZipArchive::new(CAIReadWrapper { + reader: input_stream, + }) + } + fn writer<'a>( &self, input_stream: &'a mut dyn CAIRead, @@ -222,15 +231,6 @@ impl ZipIO { Ok(writer) } - - fn reader<'a>( - &self, - input_stream: &'a mut dyn CAIRead, - ) -> ZipResult>> { - ZipArchive::new(CAIReadWrapper { - reader: input_stream, - }) - } } // TODO: probably doesn't need to return a vec From d1f3eab973492ec12e16d5bbf882ca68808d8711 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 15:30:39 -0400 Subject: [PATCH 07/21] Update memchr dep --- sdk/Cargo.toml | 2 +- sdk/tests/fixtures/sample1.zip | Bin 0 -> 2934 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 sdk/tests/fixtures/sample1.zip diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index aed7018c3..16f6a5a9d 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -96,7 +96,7 @@ jfifdump = "0.5.1" log = "0.4.8" lopdf = { version = "0.31.0", optional = true } lazy_static = "1.4.0" -memchr = "2.7.1" +memchr = "2.7.4" multibase = "0.9.0" multihash = "0.11.4" mp4 = "0.13.0" diff --git a/sdk/tests/fixtures/sample1.zip b/sdk/tests/fixtures/sample1.zip new file mode 100644 index 0000000000000000000000000000000000000000..29dec9efab522515c1df0df5b65f4b12f2b2a852 GIT binary patch literal 2934 zcmWIWW@Zs#0D+e3ml0qFlwe~}U?@o~E-};(4dG>A_w4UY1K~&@F0J5ZU}X8u$iM*B z8vr(ggMovg2+0fH^Y+1_p6VmmKVbxtRkG7^f6kq66?b2LJG++Mx9+-`yS&bcUzN4tk4qP?)nAvliDPs8 zmgA2CYnJg`!Et$T%}dS{PvqAQB34dgcd8qoze;xZ$8g z4N8k38qIJ}S`5@^6b5^N`Ku#4C)gb1)J_+#i2Voae2U-u*1>v&i?g zd$U==QOO-S-ysX~K}vUHQoab7$pJG=Y7uaa%|MsrRE5=}9v;U)6Sw5-d zd&2Kq?|mS9-MH5O>$)AiX8+eJH|5Jt6q`P4%CAS|f5nfT){Z;3by^m5y8!gNjiYSkmYMHxql6i=~1O@J2NpxoQBFo-nYaaXFAle$@`Lhl?3^MT4-X z7?|QA0Rtit2_0{>gYX2X3P7$LP($bfis6{G0G4V8IfOulBNtbwA!NpkBZM&R!IE2% zLkL^phX^4?R%i$jR}>;rB(}m2X6#$X2&l2d6orWJ1{GGw#TckCgn=cEb6{q|or Date: Tue, 9 Jul 2024 15:32:20 -0400 Subject: [PATCH 08/21] Update memchr dep in make_test_images --- make_test_images/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_test_images/Cargo.toml b/make_test_images/Cargo.toml index 1ae12fa1f..06f53e559 100644 --- a/make_test_images/Cargo.toml +++ b/make_test_images/Cargo.toml @@ -18,7 +18,7 @@ image = { version = "0.24.7", default-features = false, features = [ "jpeg", "png", ] } -memchr = "2.7.1" +memchr = "2.7.4" nom = "7.1.3" regex = "1.5.6" serde = "1.0.197" From f57b83261d107a2ef94ef138d6d6305ddb63a679 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Tue, 9 Jul 2024 16:17:08 -0400 Subject: [PATCH 09/21] Add ZIP unit tests --- sdk/src/asset_handlers/zip_io.rs | 129 +++++++++++++++++++++++++------ 1 file changed, 104 insertions(+), 25 deletions(-) diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index cff59f566..924d8ade5 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -5,7 +5,11 @@ use std::{ }; use tempfile::Builder; -use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter}; +use zip::{ + result::{ZipError, ZipResult}, + write::SimpleFileOptions, + CompressionMethod, ZipArchive, ZipWriter, +}; use crate::{ assertions::UriHashedDataMap, @@ -30,17 +34,30 @@ impl CAIWriter for ZipIO { .writer(input_stream, output_stream) .map_err(|_| Error::EmbeddingError)?; - // TODO: what happens if the dir exists? - writer - .add_directory("META-INF", SimpleFileOptions::default()) - .map_err(|_| Error::EmbeddingError)?; + match writer.add_directory("META-INF", SimpleFileOptions::default()) { + Err(ZipError::InvalidArchive("Duplicate filename")) => {} + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } + + match writer.start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default().compression_method(CompressionMethod::Stored), + ) { + Err(ZipError::InvalidArchive("Duplicate filename")) => { + writer.abort_file().map_err(|_| Error::EmbeddingError)?; + // TODO: remove code duplication + writer + .start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default().compression_method(CompressionMethod::Stored), + ) + .map_err(|_| Error::EmbeddingError)?; + } + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } - writer - .start_file_from_path( - Path::new("META-INF/content_credential.c2pa"), - SimpleFileOptions::default().compression_method(CompressionMethod::Stored), - ) - .map_err(|_| Error::EmbeddingError)?; io::copy(&mut store_bytes, &mut writer)?; writer.finish().map_err(|_| Error::EmbeddingError)?; @@ -64,12 +81,14 @@ impl CAIWriter for ZipIO { .writer(input_stream, output_stream) .map_err(|_| Error::EmbeddingError)?; - writer - .start_file_from_path( - Path::new("META-INF/content_credential.c2pa"), - SimpleFileOptions::default(), - ) - .map_err(|_| Error::EmbeddingError)?; + match writer.start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default(), + ) { + Err(ZipError::InvalidArchive("Duplicate filename")) => {} + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } writer.abort_file().map_err(|_| Error::EmbeddingError)?; writer.finish().map_err(|_| Error::EmbeddingError)?; @@ -221,15 +240,12 @@ impl ZipIO { input_stream: &'a mut dyn CAIRead, output_stream: &'a mut dyn CAIReadWrite, ) -> ZipResult>> { - let mut writer = ZipWriter::new_append(CAIReadWriteWrapper { - reader_writer: output_stream, - })?; + input_stream.rewind()?; + io::copy(input_stream, output_stream)?; - writer.merge_archive(ZipArchive::new(CAIReadWrapper { - reader: input_stream, - })?)?; - - Ok(writer) + ZipWriter::new_append(CAIReadWriteWrapper { + reader_writer: output_stream, + }) } } @@ -302,3 +318,66 @@ where Ok(ranges) } + +#[cfg(test)] +mod tests { + use io::{Cursor, Seek}; + + use super::*; + + const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + + #[test] + fn test_write_bytes() -> Result<()> { + let mut stream = Cursor::new(SAMPLE1); + + let zip_io = ZipIO {}; + + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); + + let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream)?; + assert_eq!(data_written, random_bytes); + + Ok(()) + } + + #[test] + fn test_write_bytes_replace() -> Result<()> { + let mut stream = Cursor::new(SAMPLE1); + + let zip_io = ZipIO {}; + + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); + + let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream1)?; + assert_eq!(data_written, random_bytes); + + let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 5)); + let random_bytes = [3, 2, 1, 2, 3]; + zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream2)?; + assert_eq!(data_written, random_bytes); + + let mut bytes = Vec::new(); + stream.rewind()?; + stream.read_to_end(&mut bytes)?; + assert_eq!(SAMPLE1, bytes); + + Ok(()) + } +} From 770045354d7f5325edaf060167c8b50bc8ef8e57 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Wed, 10 Jul 2024 09:36:09 -0400 Subject: [PATCH 10/21] Collection Hash Assertion relative path validation --- sdk/src/assertions/collection_hash.rs | 27 ++++++++++++++++++++++++--- sdk/src/asset_handlers/zip_io.rs | 6 +++--- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 87a315d5c..c78ce70f3 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -1,7 +1,7 @@ use std::{ fs::File, io::{Read, Seek}, - path::{Path, PathBuf}, + path::{Component, Path, PathBuf}, }; use serde::{Deserialize, Serialize}; @@ -52,12 +52,13 @@ impl CollectionHash { self.uris.push(uri_map); } - // TODO: is it safe to assume self.uris includes the stream that's being embedded into? or should - // we pass it as a param? + // The base path MUST be the folder of the manifest. A URI MUST NOT reference a path outside of that folder. pub fn gen_hash(&mut self, base_path: &Path) -> Result<()> where R: Read + Seek + ?Sized, { + self.validate_paths()?; + let alg = self.alg().to_owned(); for uri_map in &mut self.uris { let path = base_path.join(&uri_map.uri); @@ -81,6 +82,8 @@ impl CollectionHash { where R: Read + Seek + ?Sized, { + self.validate_paths()?; + let alg = alg.unwrap_or_else(|| self.alg()); for uri_map in &self.uris { let path = base_path.join(&uri_map.uri); @@ -183,4 +186,22 @@ impl CollectionHash { fn alg(&self) -> &str { self.alg.as_deref().unwrap_or("sha256") } + + fn validate_paths(&self) -> Result<()> { + for uri_map in &self.uris { + for component in uri_map.uri.components() { + match component { + Component::CurDir | Component::ParentDir => { + return Err(Error::BadParam(format!( + "URI `{}` must not contain relative components: `.` nor `..`", + uri_map.uri.display() + ))); + } + _ => {} + } + } + } + + Ok(()) + } } diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 924d8ade5..3d9260595 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -338,7 +338,7 @@ mod tests { Err(Error::JumbfNotFound) )); - let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7)); let random_bytes = [1, 2, 3, 4, 3, 2, 1]; zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?; @@ -359,14 +359,14 @@ mod tests { Err(Error::JumbfNotFound) )); - let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7)); let random_bytes = [1, 2, 3, 4, 3, 2, 1]; zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?; let data_written = zip_io.read_cai(&mut output_stream1)?; assert_eq!(data_written, random_bytes); - let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 5)); + let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 5)); let random_bytes = [3, 2, 1, 2, 3]; zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?; From 799481772f61c3db3ad8784fe6507de1d55425e6 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Wed, 10 Jul 2024 12:44:48 -0400 Subject: [PATCH 11/21] Add collection hash unit tests --- sdk/src/assertions/collection_hash.rs | 95 ++++++++++++++++++++++++-- sdk/tests/fixtures/sample1.zip | Bin 2934 -> 1096 bytes 2 files changed, 88 insertions(+), 7 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index c78ce70f3..84a6e712e 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -11,7 +11,7 @@ use crate::{ hash_utils::verify_stream_by_alg, Error, HashRange, Result, }; -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Default)] pub struct CollectionHash { pub uris: Vec, @@ -40,12 +40,8 @@ pub struct UriHashedDataMap { } impl CollectionHash { - pub fn new(alg: String) -> Self { - CollectionHash { - uris: Vec::new(), - alg: Some(alg), - zip_central_directory_hash: None, - } + pub fn new() -> Self { + Self::default() } pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { @@ -205,3 +201,88 @@ impl CollectionHash { Ok(()) } } + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + + const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + + #[test] + fn test_zip_uri_gen() -> Result<()> { + let mut stream = Cursor::new(ZIP_SAMPLE1); + + let mut collection = CollectionHash::new(); + collection.gen_uris_from_zip_stream(&mut stream)?; + + assert_eq!( + collection.uris.first(), + Some(&UriHashedDataMap { + uri: PathBuf::from("sample1/test1.txt"), + hash: Vec::new(), + size: Some(44), + dc_format: None, + data_types: None + }) + ); + assert_eq!( + collection.uris.get(1), + Some(&UriHashedDataMap { + uri: PathBuf::from("sample1/test1/test1.txt"), + hash: Vec::new(), + size: Some(87), + dc_format: None, + data_types: None + }) + ); + assert_eq!( + collection.uris.get(2), + Some(&UriHashedDataMap { + uri: PathBuf::from("sample1/test1/test2.txt"), + hash: Vec::new(), + size: Some(148), + dc_format: None, + data_types: None + }) + ); + assert_eq!( + collection.uris.get(3), + Some(&UriHashedDataMap { + uri: PathBuf::from("sample1/test1/test3.txt"), + hash: Vec::new(), + size: Some(186), + dc_format: None, + data_types: None + }) + ); + assert_eq!( + collection.uris.get(4), + Some(&UriHashedDataMap { + uri: PathBuf::from("sample1/test2.txt"), + hash: Vec::new(), + size: Some(304), + dc_format: None, + data_types: None + }) + ); + assert_eq!(collection.uris.len(), 5); + + Ok(()) + } + + #[test] + fn test_zip_hash_gen() -> Result<()> { + let mut stream = Cursor::new(ZIP_SAMPLE1); + + // TODO: blocked by zip_io::central_directory_inclusions + // let mut collection = CollectionHash::new(); + // collection.gen_uris_from_zip_stream(&mut stream)?; + // collection.gen_hash_from_zip_stream(&mut stream)?; + + // TODO: assert central dir hash + uri map hashes + + Ok(()) + } +} diff --git a/sdk/tests/fixtures/sample1.zip b/sdk/tests/fixtures/sample1.zip index 29dec9efab522515c1df0df5b65f4b12f2b2a852..18c5dd36e60e4ed024568c3128c220b72fe426f7 100644 GIT binary patch literal 1096 zcmWIWW@Zs#W&ncL>X#898V2})jN-)Hf}B)C{gTw;5<~p}pjs}l+5|MUg6L}XN-9ba z%8P)?dFtwKumF{TusFJMhy@@`Ae#^xGtlh8uF(k9Hjqb{zdEw>18o6e?3#>Wwkd$D zgb*N$A%W3}W-)qT7$F5lIZ*9g<(6ffK)XN~Jutw|gIf~d&B$cWj4Q}hfcil|py90} zh(?V7E(QgVAOjeT4XDt(XBv4G>Szn%XyJ4z-AFiiSB)zzGZC1y(j-ATY24;Xx(_hC-l03=9DOg!?K0 literal 2934 zcmWIWW@Zs#0D+e3ml0qFlwe~}U?@o~E-};(4dG>A_w4UY1K~&@F0J5ZU}X8u$iM*B z8vr(ggMovg2+0fH^Y+1_p6VmmKVbxtRkG7^f6kq66?b2LJG++Mx9+-`yS&bcUzN4tk4qP?)nAvliDPs8 zmgA2CYnJg`!Et$T%}dS{PvqAQB34dgcd8qoze;xZ$8g z4N8k38qIJ}S`5@^6b5^N`Ku#4C)gb1)J_+#i2Voae2U-u*1>v&i?g zd$U==QOO-S-ysX~K}vUHQoab7$pJG=Y7uaa%|MsrRE5=}9v;U)6Sw5-d zd&2Kq?|mS9-MH5O>$)AiX8+eJH|5Jt6q`P4%CAS|f5nfT){Z;3by^m5y8!gNjiYSkmYMHxql6i=~1O@J2NpxoQBFo-nYaaXFAle$@`Lhl?3^MT4-X z7?|QA0Rtit2_0{>gYX2X3P7$LP($bfis6{G0G4V8IfOulBNtbwA!NpkBZM&R!IE2% zLkL^phX^4?R%i$jR}>;rB(}m2X6#$X2&l2d6orWJ1{GGw#TckCgn=cEb6{q|or Date: Wed, 10 Jul 2024 13:13:08 -0400 Subject: [PATCH 12/21] Pass CI for collection hash --- sdk/src/assertions/collection_hash.rs | 2 +- sdk/src/asset_handlers/zip_io.rs | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 84a6e712e..3bf26caf5 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -274,7 +274,7 @@ mod tests { #[test] fn test_zip_hash_gen() -> Result<()> { - let mut stream = Cursor::new(ZIP_SAMPLE1); + // let mut stream = Cursor::new(ZIP_SAMPLE1); // TODO: blocked by zip_io::central_directory_inclusions // let mut collection = CollectionHash::new(); diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 3d9260595..d185bf9b3 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -256,9 +256,7 @@ where { let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; - // TODO: https://github.com/zip-rs/zip2/pull/71 - // or - // https://gitlab.com/xMAC94x/zip-core (https://github.com/zip-rs/zip2/issues/204) + // TODO: https://github.com/zip-rs/zip2/issues/209 todo!() } From 32fc67487ceb0a6c7b84751af36e16b1711afcff Mon Sep 17 00:00:00 2001 From: ok-nick Date: Wed, 10 Jul 2024 13:28:28 -0400 Subject: [PATCH 13/21] Fix ZIP offsets/lens --- sdk/src/assertions/collection_hash.rs | 10 +++++----- sdk/src/asset_handlers/zip_io.rs | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 3bf26caf5..44f0cf76e 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -222,7 +222,7 @@ mod tests { Some(&UriHashedDataMap { uri: PathBuf::from("sample1/test1.txt"), hash: Vec::new(), - size: Some(44), + size: Some(47), dc_format: None, data_types: None }) @@ -232,7 +232,7 @@ mod tests { Some(&UriHashedDataMap { uri: PathBuf::from("sample1/test1/test1.txt"), hash: Vec::new(), - size: Some(87), + size: Some(57), dc_format: None, data_types: None }) @@ -242,7 +242,7 @@ mod tests { Some(&UriHashedDataMap { uri: PathBuf::from("sample1/test1/test2.txt"), hash: Vec::new(), - size: Some(148), + size: Some(53), dc_format: None, data_types: None }) @@ -252,7 +252,7 @@ mod tests { Some(&UriHashedDataMap { uri: PathBuf::from("sample1/test1/test3.txt"), hash: Vec::new(), - size: Some(186), + size: Some(68), dc_format: None, data_types: None }) @@ -262,7 +262,7 @@ mod tests { Some(&UriHashedDataMap { uri: PathBuf::from("sample1/test2.txt"), hash: Vec::new(), - size: Some(304), + size: Some(56), dc_format: None, data_types: None }) diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index d185bf9b3..5588c14e4 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -280,10 +280,9 @@ where #[allow(clippy::unwrap_used)] uri: file.enclosed_name().unwrap(), hash: Vec::new(), - // TODO: same here - size: Some(file.header_start() - file.compressed_size()), - dc_format: None, // TODO - data_types: None, // TODO + size: Some((file.data_start() + file.compressed_size()) - file.header_start()), + dc_format: None, + data_types: None, }); } } @@ -305,11 +304,11 @@ where let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; if !file.is_dir() { - // TODO: hash from header or data? does compressed_size include header? - // and fix error type + // TODO: fix error type ranges.push(HashRange::new( usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, - usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?, + usize::try_from((file.data_start() + file.compressed_size()) - file.header_start()) + .map_err(|_| Error::JumbfNotFound)?, )); } } From d66888834cbf360a0b121eac2a5b504a74e4e1b5 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Thu, 11 Jul 2024 16:43:21 -0400 Subject: [PATCH 14/21] Collection assertion docs, optimizations, and cleanup --- sdk/src/assertions/collection_hash.rs | 515 ++++++++++++++++++-------- sdk/src/assertions/labels.rs | 5 + sdk/src/asset_handlers/zip_io.rs | 72 +--- 3 files changed, 368 insertions(+), 224 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 44f0cf76e..bab00d317 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -1,142 +1,229 @@ use std::{ - fs::File, + fs::{self, File}, io::{Read, Seek}, path::{Component, Path, PathBuf}, }; use serde::{Deserialize, Serialize}; +use zip::ZipArchive; use crate::{ - assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, - hash_utils::verify_stream_by_alg, Error, HashRange, Result, + assertion::{Assertion, AssertionBase, AssertionCbor}, + assertions::{labels::COLLECTION_HASH, AssetType}, + hash_stream_by_alg, + hash_utils::verify_stream_by_alg, + Error, HashRange, Result, }; -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Default)] +// TODO: which version? +const ASSERTION_CREATION_VERSION: usize = 2; + +/// A collection hash is used to hash multiple files within a collection (e.g. a folder or a zip file). +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct CollectionHash { + /// List of files and their metadata to include in the collection hash. pub uris: Vec, + /// Algorithm used to hash the files. #[serde(skip_serializing_if = "Option::is_none")] pub alg: Option, - #[serde(skip_serializing_if = "Option::is_none", with = "serde_bytes")] - pub zip_central_directory_hash: Option>, + // Although this isn't explicitly defined in the spec, user's MUST specify a base path when constructing + // a collection hash. You may notice that zips do not require this field, so we can make it optional, + // but that would mean users can optionally specify it, which isn't true. + // + /// This field represents the root directory where files must be contained within. If the path is a file, it + /// will default to using the file's parent. For more information, read [`CollectionHash::new`][CollectionHash::new]. + pub base_path: PathBuf, + + // The user would never need to explicilty specify this field, it's always recomputed internally. + #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] + zip_central_directory_hash: Option>, + + #[serde(skip)] + zip_central_directory_hash_range: Option, } +/// Information about a file in a [`CollectionHash`][CollectionHash]. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct UriHashedDataMap { + /// Path to the file included in the collection. pub uri: PathBuf, - #[serde(with = "serde_bytes")] - pub hash: Vec, + // Same as zip_central_directory_hash, this field is always recomputed, users would never need to specify it + // explicitly. + #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] + hash: Option>, + /// Size of the file in the collection. #[serde(skip_serializing_if = "Option::is_none")] pub size: Option, + /// Mime type of the file. + /// + /// Note that this field is specified as `dc:format` during serialization/deserialization. #[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")] pub dc_format: Option, + /// Additional information about the type of data in the file. #[serde(skip_serializing_if = "Option::is_none")] pub data_types: Option>, + + #[serde(skip)] + zip_hash_range: Option, } impl CollectionHash { - pub fn new() -> Self { - Self::default() + pub const LABEL: &'static str = COLLECTION_HASH; + + /// Create a new collection hash with the specified base path. + /// + /// A base path means that any path added to the collection will use the base path as the root. If the + /// added path is outside the scope of the base path, hashing will immediately result in an error. + /// + /// The base path may either be a file or a directory. However, if it s a file, it will use the parent + /// directory as the root. + pub fn new(base_path: PathBuf) -> Self { + Self { + uris: Vec::new(), + alg: None, + // TODO: if base_path is a file, then do .parent() or error? + base_path, + zip_central_directory_hash: None, + zip_central_directory_hash_range: None, + } + } + + /// Create a new collection hash with the specified algorithm. + /// + /// For more details on base_path, read [`CollectionHash::new`][CollectionHash::new]. + pub fn with_alg(base_path: PathBuf, alg: String) -> Self { + Self { + uris: Vec::new(), + alg: Some(alg), + base_path, + zip_central_directory_hash: None, + zip_central_directory_hash_range: None, + } } - pub fn add_uri_map(&mut self, uri_map: UriHashedDataMap) { - self.uris.push(uri_map); + /// Adds a new file to the collection hash. + /// + /// Note that the specified path MUST be a file, not a directory. It must also be within the scope of the + /// base_path. Read more on base_path in [`CollectionHash::new`][CollectionHash::new]. + pub fn add_file(&mut self, path: PathBuf) -> Result<()> { + self.add_file_raw(path, None) } - // The base path MUST be the folder of the manifest. A URI MUST NOT reference a path outside of that folder. - pub fn gen_hash(&mut self, base_path: &Path) -> Result<()> + /// Add a file with the specified data types. + /// + /// Read more on the constraints of these parameters in [`CollectionHash::add_file`][CollectionHash::add_file]. + pub fn add_file_with_data_types( + &mut self, + path: PathBuf, + data_types: Vec, + ) -> Result<()> { + self.add_file_raw(path, Some(data_types)) + } + + /// Generate the hashes for the files in the collection. + pub fn gen_hash(&mut self) -> Result<()> where R: Read + Seek + ?Sized, { - self.validate_paths()?; - let alg = self.alg().to_owned(); for uri_map in &mut self.uris { - let path = base_path.join(&uri_map.uri); - let mut file = File::open(path)?; - let file_len = file.metadata()?.len(); + let path = &uri_map.uri; + Self::validate_path(path)?; - uri_map.hash = hash_stream_by_alg( + let mut file = File::open(path)?; + let file_len = match uri_map.size { + Some(file_len) => file_len, + None => file.metadata()?.len(), + }; + uri_map.hash = Some(hash_stream_by_alg( &alg, &mut file, // TODO: temp unwrap #[allow(clippy::unwrap_used)] Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), false, - )?; + )?); } Ok(()) } - pub fn verify_hash(&self, alg: Option<&str>, base_path: &Path) -> Result<()> + /// Validate the hashes for the files in the collection. + pub fn verify_hash(&self, alg: Option<&str>) -> Result<()> where R: Read + Seek + ?Sized, { - self.validate_paths()?; - let alg = alg.unwrap_or_else(|| self.alg()); for uri_map in &self.uris { - let path = base_path.join(&uri_map.uri); - let mut file = File::open(&path)?; + let path = &uri_map.uri; + Self::validate_path(path)?; + + let mut file = File::open(path)?; let file_len = file.metadata()?.len(); - if !verify_stream_by_alg( - alg, - &uri_map.hash, - &mut file, - // TODO: temp unwrap - #[allow(clippy::unwrap_used)] - Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), - false, - ) { - return Err(Error::HashMismatch(format!( - "hash for {} does not match", - path.display() - ))); + match &uri_map.hash { + Some(hash) => { + if !verify_stream_by_alg( + alg, + hash, + &mut file, + // TODO: temp unwrap + #[allow(clippy::unwrap_used)] + Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + path.display() + ))); + } + } + None => todo!(), } } Ok(()) } - // We overwrite all URIs with all existing URIs in the ZIP because all URIs in the ZIP represent all - // possible valid URIs — we don't want duplicates! - pub fn gen_uris_from_zip_stream(&mut self, stream: &mut R) -> Result<()> - where - R: Read + Seek + ?Sized, - { - self.uris = zip_io::uri_maps(stream)?; - Ok(()) - } - pub fn gen_hash_from_zip_stream(&mut self, stream: &mut R) -> Result<()> where R: Read + Seek + ?Sized, { let alg = self.alg().to_owned(); - let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; - let zip_central_directory_hash = - hash_stream_by_alg(&alg, stream, Some(zip_central_directory_inclusions), false)?; + let zip_central_directory_inclusions = zip_central_directory_range(stream)?; + let zip_central_directory_hash = hash_stream_by_alg( + &alg, + stream, + Some(vec![zip_central_directory_inclusions]), + false, + )?; if zip_central_directory_hash.is_empty() { return Err(Error::BadParam("could not generate data hash".to_string())); } self.zip_central_directory_hash = Some(zip_central_directory_hash); - let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?; - for (uri_map, hash_range) in self.uris.iter_mut().zip(hash_ranges) { - let hash = hash_stream_by_alg(&alg, stream, Some(vec![hash_range]), false)?; + self.uris = zip_uri_ranges(stream)?; + for uri_map in &mut self.uris { + let hash = hash_stream_by_alg( + &alg, + stream, + // We always generate the zip_hash_range in zip_uri_ranges. + #[allow(clippy::unwrap_used)] + Some(vec![uri_map.zip_hash_range.clone().unwrap()]), + false, + )?; if hash.is_empty() { return Err(Error::BadParam("could not generate data hash".to_string())); } - uri_map.hash = hash; + uri_map.hash = Some(hash); } Ok(()) @@ -147,18 +234,19 @@ impl CollectionHash { R: Read + Seek + ?Sized, { let alg = alg.unwrap_or_else(|| self.alg()); - let central_directory_hash = match &self.zip_central_directory_hash { + let zip_central_directory_hash = match &self.zip_central_directory_hash { Some(hash) => Ok(hash), None => Err(Error::BadParam( "Missing zip central directory hash".to_owned(), )), }?; - let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?; if !verify_stream_by_alg( alg, - central_directory_hash, + zip_central_directory_hash, stream, - Some(zip_central_directory_inclusions), + // If zip_central_directory_hash exists (we checked above), then this must exist. + #[allow(clippy::unwrap_used)] + Some(vec![self.zip_central_directory_hash_range.clone().unwrap()]), false, ) { return Err(Error::HashMismatch( @@ -166,35 +254,70 @@ impl CollectionHash { )); } - let hash_ranges = zip_io::uri_inclusions(stream, &self.uris)?; - for (uri_map, hash_range) in self.uris.iter().zip(hash_ranges) { - if !verify_stream_by_alg(alg, &uri_map.hash, stream, Some(vec![hash_range]), false) { - return Err(Error::HashMismatch(format!( - "hash for {} does not match", - uri_map.uri.display() - ))); + for uri_map in &self.uris { + match &uri_map.hash { + Some(hash) => { + if !verify_stream_by_alg( + alg, + hash, + stream, + // Same reason as above. + #[allow(clippy::unwrap_used)] + Some(vec![uri_map.zip_hash_range.clone().unwrap()]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + uri_map.uri.display() + ))); + } + } + None => todo!(), } } Ok(()) } + fn add_file_raw(&mut self, path: PathBuf, data_types: Option>) -> Result<()> { + // TODO: how should we handle if the path already exists in the collection? + Self::validate_path(&path)?; + + let format = crate::format_from_path(&path); + let metadata = fs::metadata(&path)?; + self.uris.push(UriHashedDataMap { + uri: self.base_path.join(path), + hash: None, + size: Some(metadata.len()), + dc_format: format, + data_types, + zip_hash_range: None, + }); + + Ok(()) + } + fn alg(&self) -> &str { self.alg.as_deref().unwrap_or("sha256") } - fn validate_paths(&self) -> Result<()> { - for uri_map in &self.uris { - for component in uri_map.uri.components() { - match component { - Component::CurDir | Component::ParentDir => { - return Err(Error::BadParam(format!( - "URI `{}` must not contain relative components: `.` nor `..`", - uri_map.uri.display() - ))); - } - _ => {} + fn validate_path(path: &Path) -> Result<()> { + if !path.is_file() { + return Err(Error::BadParam(format!( + "Collection hashes must only contain files; got `{}`", + path.display() + ))); + } + + for component in path.components() { + match component { + Component::CurDir | Component::ParentDir => { + return Err(Error::BadParam(format!( + "URI `{}` must not contain relative components: `.` nor `..`", + path.display() + ))); } + _ => {} } } @@ -202,87 +325,171 @@ impl CollectionHash { } } -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use super::*; - - const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); - - #[test] - fn test_zip_uri_gen() -> Result<()> { - let mut stream = Cursor::new(ZIP_SAMPLE1); - - let mut collection = CollectionHash::new(); - collection.gen_uris_from_zip_stream(&mut stream)?; - - assert_eq!( - collection.uris.first(), - Some(&UriHashedDataMap { - uri: PathBuf::from("sample1/test1.txt"), - hash: Vec::new(), - size: Some(47), - dc_format: None, - data_types: None - }) - ); - assert_eq!( - collection.uris.get(1), - Some(&UriHashedDataMap { - uri: PathBuf::from("sample1/test1/test1.txt"), - hash: Vec::new(), - size: Some(57), - dc_format: None, - data_types: None - }) - ); - assert_eq!( - collection.uris.get(2), - Some(&UriHashedDataMap { - uri: PathBuf::from("sample1/test1/test2.txt"), - hash: Vec::new(), - size: Some(53), - dc_format: None, - data_types: None - }) - ); - assert_eq!( - collection.uris.get(3), - Some(&UriHashedDataMap { - uri: PathBuf::from("sample1/test1/test3.txt"), - hash: Vec::new(), - size: Some(68), - dc_format: None, - data_types: None - }) - ); - assert_eq!( - collection.uris.get(4), - Some(&UriHashedDataMap { - uri: PathBuf::from("sample1/test2.txt"), - hash: Vec::new(), - size: Some(56), - dc_format: None, - data_types: None - }) - ); - assert_eq!(collection.uris.len(), 5); +impl AssertionBase for CollectionHash { + const LABEL: &'static str = Self::LABEL; + const VERSION: Option = Some(ASSERTION_CREATION_VERSION); - Ok(()) + fn from_assertion(assertion: &Assertion) -> Result { + Self::from_cbor_assertion(assertion) } - #[test] - fn test_zip_hash_gen() -> Result<()> { - // let mut stream = Cursor::new(ZIP_SAMPLE1); + // We don't need to check if the zip_central_directory_hash exists, because if it is a zip + // and one of the uri maps hashes don't exist, then that means the central dir hash doesn't exist. + fn to_assertion(&self) -> Result { + if self.uris.iter().any(|uri_map| uri_map.hash.is_none()) { + return Err(Error::BadParam( + "No hash found, ensure gen_hash is called".to_string(), + )); + } - // TODO: blocked by zip_io::central_directory_inclusions - // let mut collection = CollectionHash::new(); - // collection.gen_uris_from_zip_stream(&mut stream)?; - // collection.gen_hash_from_zip_stream(&mut stream)?; + Self::to_cbor_assertion(self) + } +} - // TODO: assert central dir hash + uri map hashes +impl AssertionCbor for CollectionHash {} - Ok(()) +pub fn zip_central_directory_range(reader: &mut R) -> Result +where + R: Read + Seek + ?Sized, +{ + let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; + + // TODO: https://github.com/zip-rs/zip2/issues/209 + + todo!() +} + +pub fn zip_uri_ranges(stream: &mut R) -> Result> +where + R: Read + Seek + ?Sized, +{ + let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; + + let mut uri_maps = Vec::new(); + let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); + for file_name in file_names { + let file = reader + .by_name(&file_name) + .map_err(|_| Error::JumbfNotFound)?; + + if !file.is_dir() { + match file.enclosed_name() { + Some(path) => { + if path != Path::new("META-INF/content_credential.c2pa") { + uri_maps.push(UriHashedDataMap { + dc_format: crate::format_from_path(&path), + uri: path, + hash: Some(Vec::new()), + size: Some( + (file.data_start() + file.compressed_size()) - file.header_start(), + ), + data_types: None, + // TODO: fix error types + zip_hash_range: Some(HashRange::new( + usize::try_from(file.header_start()) + .map_err(|_| Error::JumbfNotFound)?, + usize::try_from( + (file.data_start() + file.compressed_size()) + - file.header_start(), + ) + .map_err(|_| Error::JumbfNotFound)?, + )), + }); + } + } + None => todo!(), + } + } } + + Ok(uri_maps) } + +// TODO: blocked by central_directory_inclusions +// #[cfg(test)] +// mod tests { +// use std::io::Cursor; + +// use super::*; + +// const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + +// #[test] +// fn test_zip_hash() -> Result<()> { +// let mut stream = Cursor::new(ZIP_SAMPLE1); + +// let mut collection = CollectionHash { +// uris: Vec::new(), +// alg: None, +// zip_central_directory_hash: None, +// base_path: PathBuf::new(), +// zip_central_directory_hash_range: None, +// }; +// collection.gen_hash_from_zip_stream(&mut stream)?; + +// assert_eq!(collection.zip_central_directory_hash, vec![0]); +// assert_eq!( +// collection.zip_central_directory_hash_range, +// Some(HashRange::new(0, 0)) +// ); + +// assert_eq!( +// collection.uris.first(), +// Some(&UriHashedDataMap { +// uri: PathBuf::from("sample1/test1.txt"), +// hash: Some(vec![0]), +// size: Some(47), +// dc_format: None, +// data_types: None, +// zip_hash_range: None, +// }) +// ); +// assert_eq!( +// collection.uris.get(1), +// Some(&UriHashedDataMap { +// uri: PathBuf::from("sample1/test1/test1.txt"), +// hash: Some(vec![0]), +// size: Some(57), +// dc_format: None, +// data_types: None, +// zip_hash_range: None, +// }) +// ); +// assert_eq!( +// collection.uris.get(2), +// Some(&UriHashedDataMap { +// uri: PathBuf::from("sample1/test1/test2.txt"), +// hash: Some(vec![0]), +// size: Some(53), +// dc_format: None, +// data_types: None, +// zip_hash_range: None, +// }) +// ); +// assert_eq!( +// collection.uris.get(3), +// Some(&UriHashedDataMap { +// uri: PathBuf::from("sample1/test1/test3.txt"), +// hash: Some(vec![0]), +// size: Some(68), +// dc_format: None, +// data_types: None, +// zip_hash_range: None, +// }) +// ); +// assert_eq!( +// collection.uris.get(4), +// Some(&UriHashedDataMap { +// uri: PathBuf::from("sample1/test2.txt"), +// hash: Some(vec![0]), +// size: Some(56), +// dc_format: None, +// data_types: None, +// zip_hash_range: None, +// }) +// ); +// assert_eq!(collection.uris.len(), 5); + +// Ok(()) +// } +// } diff --git a/sdk/src/assertions/labels.rs b/sdk/src/assertions/labels.rs index c4c5990cb..c11b8cee8 100644 --- a/sdk/src/assertions/labels.rs +++ b/sdk/src/assertions/labels.rs @@ -39,6 +39,11 @@ pub const DATA_HASH: &str = "c2pa.hash.data"; /// See . pub const BOX_HASH: &str = "c2pa.hash.boxes"; +/// Label prefix for a collection hash assertion. +/// +/// See . +pub const COLLECTION_HASH: &str = "c2pa.hash.collection.data"; + /// Label prefix for a BMFF-based hash assertion. /// /// See . diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 5588c14e4..8e27e921e 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -1,6 +1,6 @@ use std::{ fs::{self, File}, - io::{self, Read, Seek}, + io::{self, Read}, path::Path, }; @@ -12,13 +12,12 @@ use zip::{ }; use crate::{ - assertions::UriHashedDataMap, asset_io::{ self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, HashObjectPositions, }, error::Result, - CAIRead, CAIReadWrite, Error, HashRange, + CAIRead, CAIReadWrite, Error, }; pub struct ZipIO {} @@ -249,73 +248,6 @@ impl ZipIO { } } -// TODO: probably doesn't need to return a vec -pub fn central_directory_inclusions(reader: &mut R) -> Result> -where - R: Read + Seek + ?Sized, -{ - let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; - - // TODO: https://github.com/zip-rs/zip2/issues/209 - - todo!() -} - -pub fn uri_maps(stream: &mut R) -> Result> -where - R: Read + Seek + ?Sized, -{ - let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; - - let mut uri_maps = Vec::new(); - let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); - for file_name in file_names { - let file = reader - .by_name(&file_name) - .map_err(|_| Error::JumbfNotFound)?; - - if !file.is_dir() { - uri_maps.push(UriHashedDataMap { - // TODO: temp unwrap - #[allow(clippy::unwrap_used)] - uri: file.enclosed_name().unwrap(), - hash: Vec::new(), - size: Some((file.data_start() + file.compressed_size()) - file.header_start()), - dc_format: None, - data_types: None, - }); - } - } - - Ok(uri_maps) -} - -pub fn uri_inclusions(stream: &mut R, uri_maps: &[UriHashedDataMap]) -> Result> -where - R: Read + Seek + ?Sized, -{ - let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; - - let mut ranges = Vec::new(); - for uri_map in uri_maps { - let index = reader - .index_for_path(&uri_map.uri) - .ok_or(Error::JumbfNotFound)?; - let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; - - if !file.is_dir() { - // TODO: fix error type - ranges.push(HashRange::new( - usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?, - usize::try_from((file.data_start() + file.compressed_size()) - file.header_start()) - .map_err(|_| Error::JumbfNotFound)?, - )); - } - } - - Ok(ranges) -} - #[cfg(test)] mod tests { use io::{Cursor, Seek}; From 11bef802ae365959286f1c241c232841b3873def Mon Sep 17 00:00:00 2001 From: ok-nick Date: Fri, 12 Jul 2024 09:28:08 -0400 Subject: [PATCH 15/21] Cleanup collection hash errors --- sdk/src/assertions/collection_hash.rs | 102 ++++++++++++++++---------- 1 file changed, 65 insertions(+), 37 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index bab00d317..03148007f 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -83,28 +83,15 @@ impl CollectionHash { /// /// The base path may either be a file or a directory. However, if it s a file, it will use the parent /// directory as the root. - pub fn new(base_path: PathBuf) -> Self { - Self { - uris: Vec::new(), - alg: None, - // TODO: if base_path is a file, then do .parent() or error? - base_path, - zip_central_directory_hash: None, - zip_central_directory_hash_range: None, - } + pub fn new(base_path: PathBuf) -> Result { + Self::new_raw(base_path, None) } /// Create a new collection hash with the specified algorithm. /// /// For more details on base_path, read [`CollectionHash::new`][CollectionHash::new]. - pub fn with_alg(base_path: PathBuf, alg: String) -> Self { - Self { - uris: Vec::new(), - alg: Some(alg), - base_path, - zip_central_directory_hash: None, - zip_central_directory_hash_range: None, - } + pub fn with_alg(base_path: PathBuf, alg: String) -> Result { + Self::new_raw(base_path, Some(alg)) } /// Adds a new file to the collection hash. @@ -144,9 +131,12 @@ impl CollectionHash { uri_map.hash = Some(hash_stream_by_alg( &alg, &mut file, - // TODO: temp unwrap - #[allow(clippy::unwrap_used)] - Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + Some(vec![HashRange::new( + 0, + usize::try_from(file_len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", file_len)) + })?, + )]), false, )?); } @@ -173,9 +163,12 @@ impl CollectionHash { alg, hash, &mut file, - // TODO: temp unwrap - #[allow(clippy::unwrap_used)] - Some(vec![HashRange::new(0, usize::try_from(file_len).unwrap())]), + Some(vec![HashRange::new( + 0, + usize::try_from(file_len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", file_len)) + })?, + )]), false, ) { return Err(Error::HashMismatch(format!( @@ -184,7 +177,11 @@ impl CollectionHash { ))); } } - None => todo!(), + None => { + return Err(Error::BadParam( + "Must generate hashes before verifying".to_owned(), + )); + } } } @@ -272,13 +269,40 @@ impl CollectionHash { ))); } } - None => todo!(), + None => { + return Err(Error::BadParam( + "Must generate hashes before verifying".to_owned(), + )); + } } } Ok(()) } + fn new_raw(base_path: PathBuf, alg: Option) -> Result { + let base_path = match base_path.is_file() { + true => match base_path.parent() { + Some(path) => path.to_path_buf(), + None => { + return Err(Error::BadParam( + "Base path must be a directory or a file with a parent directory" + .to_owned(), + )) + } + }, + false => base_path, + }; + + Ok(Self { + uris: Vec::new(), + alg, + base_path, + zip_central_directory_hash: None, + zip_central_directory_hash_range: None, + }) + } + fn add_file_raw(&mut self, path: PathBuf, data_types: Option>) -> Result<()> { // TODO: how should we handle if the path already exists in the collection? Self::validate_path(&path)?; @@ -376,28 +400,32 @@ where match file.enclosed_name() { Some(path) => { if path != Path::new("META-INF/content_credential.c2pa") { + let start = file.header_start(); + let len = + (file.data_start() + file.compressed_size()) - file.header_start(); uri_maps.push(UriHashedDataMap { dc_format: crate::format_from_path(&path), uri: path, hash: Some(Vec::new()), - size: Some( - (file.data_start() + file.compressed_size()) - file.header_start(), - ), + size: Some(len), data_types: None, - // TODO: fix error types zip_hash_range: Some(HashRange::new( - usize::try_from(file.header_start()) - .map_err(|_| Error::JumbfNotFound)?, - usize::try_from( - (file.data_start() + file.compressed_size()) - - file.header_start(), - ) - .map_err(|_| Error::JumbfNotFound)?, + usize::try_from(start).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", start)) + })?, + usize::try_from(len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", len)) + })?, )), }); } } - None => todo!(), + None => { + return Err(Error::BadParam(format!( + "Invalid stored path `{}` in zip file", + file_name + ))) + } } } } From d383ce81b5cd178a055801b3495cf273bcbbbba5 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Fri, 12 Jul 2024 11:52:27 -0400 Subject: [PATCH 16/21] Rework collection hash and add better validation --- sdk/src/assertions/collection_hash.rs | 162 +++++++++++++++----------- sdk/src/asset_handlers/zip_io.rs | 1 + 2 files changed, 93 insertions(+), 70 deletions(-) diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 03148007f..595332438 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, fs::{self, File}, io::{Read, Seek}, path::{Component, Path, PathBuf}, @@ -15,30 +16,35 @@ use crate::{ Error, HashRange, Result, }; -// TODO: which version? -const ASSERTION_CREATION_VERSION: usize = 2; +const ASSERTION_CREATION_VERSION: usize = 1; /// A collection hash is used to hash multiple files within a collection (e.g. a folder or a zip file). #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct CollectionHash { - /// List of files and their metadata to include in the collection hash. - pub uris: Vec, + // We use a hash map to avoid potential duplicates. + // + /// Map of file path to their metadata for the collection. + pub uris: HashMap, /// Algorithm used to hash the files. #[serde(skip_serializing_if = "Option::is_none")] pub alg: Option, - // Although this isn't explicitly defined in the spec, user's MUST specify a base path when constructing - // a collection hash. You may notice that zips do not require this field, so we can make it optional, - // but that would mean users can optionally specify it, which isn't true. + // TODO: in c2patool, we need to redefine this field to also handle relative paths. // /// This field represents the root directory where files must be contained within. If the path is a file, it /// will default to using the file's parent. For more information, read [`CollectionHash::new`][CollectionHash::new]. - pub base_path: PathBuf, + /// + /// While this field is marked as optional (it is not serialized as part of the spec), it is required for computing + /// hashes and MUST be specified. + #[serde(skip_serializing)] + pub base_path: Option, - // The user would never need to explicilty specify this field, it's always recomputed internally. + /// Hash of the ZIP central directory. + /// + /// This field only needs to be specified if the collection hash is for a ZIP file. #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] - zip_central_directory_hash: Option>, + pub zip_central_directory_hash: Option>, #[serde(skip)] zip_central_directory_hash_range: Option, @@ -47,15 +53,15 @@ pub struct CollectionHash { /// Information about a file in a [`CollectionHash`][CollectionHash]. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct UriHashedDataMap { - /// Path to the file included in the collection. - pub uri: PathBuf, - - // Same as zip_central_directory_hash, this field is always recomputed, users would never need to specify it - // explicitly. + /// Hash of the entire file contents. + /// + /// For a ZIP, the hash must span starting from the file header to the end of the compressed file data. #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] - hash: Option>, + pub hash: Option>, /// Size of the file in the collection. + /// + /// For a ZIP, the size must span from the file header to the end of the compressed file data. #[serde(skip_serializing_if = "Option::is_none")] pub size: Option, @@ -119,11 +125,13 @@ impl CollectionHash { R: Read + Seek + ?Sized, { let alg = self.alg().to_owned(); - for uri_map in &mut self.uris { - let path = &uri_map.uri; - Self::validate_path(path)?; + let base_path = self.base_path()?.to_owned(); + + for (path, uri_map) in &mut self.uris { + let path = base_path.join(path); + Self::validate_path(&path)?; - let mut file = File::open(path)?; + let mut file = File::open(&path)?; let file_len = match uri_map.size { Some(file_len) => file_len, None => file.metadata()?.len(), @@ -150,11 +158,13 @@ impl CollectionHash { R: Read + Seek + ?Sized, { let alg = alg.unwrap_or_else(|| self.alg()); - for uri_map in &self.uris { - let path = &uri_map.uri; - Self::validate_path(path)?; + let base_path = self.base_path()?; - let mut file = File::open(path)?; + for (path, uri_map) in &self.uris { + let path = base_path.join(path); + Self::validate_path(&path)?; + + let mut file = File::open(&path)?; let file_len = file.metadata()?.len(); match &uri_map.hash { @@ -207,7 +217,7 @@ impl CollectionHash { self.zip_central_directory_hash = Some(zip_central_directory_hash); self.uris = zip_uri_ranges(stream)?; - for uri_map in &mut self.uris { + for uri_map in self.uris.values_mut() { let hash = hash_stream_by_alg( &alg, stream, @@ -251,7 +261,7 @@ impl CollectionHash { )); } - for uri_map in &self.uris { + for (path, uri_map) in &self.uris { match &uri_map.hash { Some(hash) => { if !verify_stream_by_alg( @@ -265,7 +275,7 @@ impl CollectionHash { ) { return Err(Error::HashMismatch(format!( "hash for {} does not match", - uri_map.uri.display() + path.display() ))); } } @@ -281,42 +291,30 @@ impl CollectionHash { } fn new_raw(base_path: PathBuf, alg: Option) -> Result { - let base_path = match base_path.is_file() { - true => match base_path.parent() { - Some(path) => path.to_path_buf(), - None => { - return Err(Error::BadParam( - "Base path must be a directory or a file with a parent directory" - .to_owned(), - )) - } - }, - false => base_path, - }; - Ok(Self { - uris: Vec::new(), + uris: HashMap::new(), alg, - base_path, + base_path: Some(base_path), zip_central_directory_hash: None, zip_central_directory_hash_range: None, }) } fn add_file_raw(&mut self, path: PathBuf, data_types: Option>) -> Result<()> { - // TODO: how should we handle if the path already exists in the collection? Self::validate_path(&path)?; let format = crate::format_from_path(&path); let metadata = fs::metadata(&path)?; - self.uris.push(UriHashedDataMap { - uri: self.base_path.join(path), - hash: None, - size: Some(metadata.len()), - dc_format: format, - data_types, - zip_hash_range: None, - }); + self.uris.insert( + path, + UriHashedDataMap { + hash: None, + size: Some(metadata.len()), + dc_format: format, + data_types, + zip_hash_range: None, + }, + ); Ok(()) } @@ -325,6 +323,24 @@ impl CollectionHash { self.alg.as_deref().unwrap_or("sha256") } + fn base_path(&self) -> Result<&Path> { + match &self.base_path { + Some(base_path) => match base_path.is_file() { + true => match base_path.parent() { + Some(path) => Ok(path), + None => Err(Error::BadParam( + "Base path must be a directory or a file with a parent directory" + .to_owned(), + )), + }, + false => Ok(base_path), + }, + None => Err(Error::BadParam( + "Must specify base path for collection hash".to_owned(), + )), + } + } + fn validate_path(path: &Path) -> Result<()> { if !path.is_file() { return Err(Error::BadParam(format!( @@ -360,7 +376,7 @@ impl AssertionBase for CollectionHash { // We don't need to check if the zip_central_directory_hash exists, because if it is a zip // and one of the uri maps hashes don't exist, then that means the central dir hash doesn't exist. fn to_assertion(&self) -> Result { - if self.uris.iter().any(|uri_map| uri_map.hash.is_none()) { + if self.uris.iter().any(|(_, uri_map)| uri_map.hash.is_none()) { return Err(Error::BadParam( "No hash found, ensure gen_hash is called".to_string(), )); @@ -383,13 +399,13 @@ where todo!() } -pub fn zip_uri_ranges(stream: &mut R) -> Result> +pub fn zip_uri_ranges(stream: &mut R) -> Result> where R: Read + Seek + ?Sized, { let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; - let mut uri_maps = Vec::new(); + let mut uri_map = HashMap::new(); let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); for file_name in file_names { let file = reader @@ -403,21 +419,27 @@ where let start = file.header_start(); let len = (file.data_start() + file.compressed_size()) - file.header_start(); - uri_maps.push(UriHashedDataMap { - dc_format: crate::format_from_path(&path), - uri: path, - hash: Some(Vec::new()), - size: Some(len), - data_types: None, - zip_hash_range: Some(HashRange::new( - usize::try_from(start).map_err(|_| { - Error::BadParam(format!("Value {} out of usize range", start)) - })?, - usize::try_from(len).map_err(|_| { - Error::BadParam(format!("Value {} out of usize range", len)) - })?, - )), - }); + let format = crate::format_from_path(&path); + uri_map.insert( + path, + UriHashedDataMap { + hash: Some(Vec::new()), + size: Some(len), + dc_format: format, + data_types: None, + zip_hash_range: Some(HashRange::new( + usize::try_from(start).map_err(|_| { + Error::BadParam(format!( + "Value {} out of usize range", + start + )) + })?, + usize::try_from(len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", len)) + })?, + )), + }, + ); } } None => { @@ -430,7 +452,7 @@ where } } - Ok(uri_maps) + Ok(uri_map) } // TODO: blocked by central_directory_inclusions diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 8e27e921e..745b74bd0 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -254,6 +254,7 @@ mod tests { use super::*; + // TODO: add office, epub, and other file types for testing const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); #[test] From 744045d98551e376570df7694b99a5e2d33328ca Mon Sep 17 00:00:00 2001 From: ok-nick Date: Fri, 12 Jul 2024 12:13:54 -0400 Subject: [PATCH 17/21] More file types for ZIP unit tests --- sdk/src/asset_handlers/zip_io.rs | 74 +++++++++++++++++-------------- sdk/tests/fixtures/sample1.docx | Bin 0 -> 12980 bytes sdk/tests/fixtures/sample1.odt | Bin 0 -> 5651 bytes 3 files changed, 41 insertions(+), 33 deletions(-) create mode 100644 sdk/tests/fixtures/sample1.docx create mode 100644 sdk/tests/fixtures/sample1.odt diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs index 745b74bd0..9e24b0ba3 100644 --- a/sdk/src/asset_handlers/zip_io.rs +++ b/sdk/src/asset_handlers/zip_io.rs @@ -254,59 +254,67 @@ mod tests { use super::*; - // TODO: add office, epub, and other file types for testing - const SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + // TODO: add more sample file types + const SAMPLES: [&[u8]; 3] = [ + include_bytes!("../../tests/fixtures/sample1.zip"), + include_bytes!("../../tests/fixtures/sample1.docx"), + include_bytes!("../../tests/fixtures/sample1.odt"), + ]; #[test] fn test_write_bytes() -> Result<()> { - let mut stream = Cursor::new(SAMPLE1); + for sample in SAMPLES { + let mut stream = Cursor::new(sample); - let zip_io = ZipIO {}; + let zip_io = ZipIO {}; - assert!(matches!( - zip_io.read_cai(&mut stream), - Err(Error::JumbfNotFound) - )); + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); - let mut output_stream = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7)); - let random_bytes = [1, 2, 3, 4, 3, 2, 1]; - zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?; + let mut output_stream = Cursor::new(Vec::with_capacity(sample.len() + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?; - let data_written = zip_io.read_cai(&mut output_stream)?; - assert_eq!(data_written, random_bytes); + let data_written = zip_io.read_cai(&mut output_stream)?; + assert_eq!(data_written, random_bytes); + } Ok(()) } #[test] fn test_write_bytes_replace() -> Result<()> { - let mut stream = Cursor::new(SAMPLE1); + for sample in SAMPLES { + let mut stream = Cursor::new(sample); - let zip_io = ZipIO {}; + let zip_io = ZipIO {}; - assert!(matches!( - zip_io.read_cai(&mut stream), - Err(Error::JumbfNotFound) - )); + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); - let mut output_stream1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 7)); - let random_bytes = [1, 2, 3, 4, 3, 2, 1]; - zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?; + let mut output_stream1 = Cursor::new(Vec::with_capacity(sample.len() + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?; - let data_written = zip_io.read_cai(&mut output_stream1)?; - assert_eq!(data_written, random_bytes); + let data_written = zip_io.read_cai(&mut output_stream1)?; + assert_eq!(data_written, random_bytes); - let mut output_stream2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 5)); - let random_bytes = [3, 2, 1, 2, 3]; - zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?; + let mut output_stream2 = Cursor::new(Vec::with_capacity(sample.len() + 5)); + let random_bytes = [3, 2, 1, 2, 3]; + zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?; - let data_written = zip_io.read_cai(&mut output_stream2)?; - assert_eq!(data_written, random_bytes); + let data_written = zip_io.read_cai(&mut output_stream2)?; + assert_eq!(data_written, random_bytes); - let mut bytes = Vec::new(); - stream.rewind()?; - stream.read_to_end(&mut bytes)?; - assert_eq!(SAMPLE1, bytes); + let mut bytes = Vec::new(); + stream.rewind()?; + stream.read_to_end(&mut bytes)?; + assert_eq!(sample, bytes); + } Ok(()) } diff --git a/sdk/tests/fixtures/sample1.docx b/sdk/tests/fixtures/sample1.docx new file mode 100644 index 0000000000000000000000000000000000000000..919cb20c00d0c6de08b5274e0133c6d099673c4e GIT binary patch literal 12980 zcmeHuWpo_Ll66b6#bhxvGm~Y>VrFJ$X136>n9*Wpw3uy?#mvmi&|+=R?3t3=X7ROW!=cm>WIk98`1Jo5O2@`kN{`^06+{_%b&E?1Ootw-U0xq0BCS6Av;@V z6I*9JWeId&ggaS|-!1*a)>vqb42{W`SJL+^v>tOW05 z`54EgKqvh3C>$*Q6nV8MsAFeBXEp`epM4~SZU!acRo=?l9?jMHbh1l$>(?p;69+& zvYw!Gj+3N<5Y`Sed#~HA_1NYPNc`MZ!0bM2zX_kVdu#eUNGd@+7#OaB9yW#p8G9F! zGSY>*LnC~2%{3##;kNJlV!%P96ISX2I* zWLEl^=pU=KIz7sV@l7m|meehw13aMN`Ju0RvhzuU*G@i&EQ(92*3Sq~ln8qCek(U|eSjl|f_ z$i)UUd;KJHiILudC~UoOTdAd`iUp_OB7c2V9cs_*%wqQ#@ zLLy3;FYY5EvU|LF|Ab=Zq{cmp36BR{zUM+bB}8nqcU88a7;HXT^Q;t-B2iQ|bppRfW53N5kwnu5WdfH@6lY|zpJQ}~DO9+R zIP#FK{|5Sn>HM+y+)z2FT zy+G5uFe3<|MB=EKN7Yq;x?%Thx&AJ+G@~VRO5zzQFL(v2|CfW_AS{R2NfK$Tu#{FU z6^h805s9^y%;|@by?lXi1Jm!z__rQ9-%3imFwOYKRj3j1wviWusBn0Ue zEcSVquIZ&~a|%3wfQZC--DHBr6agak)L9D=*S%a*ww*SqyQS*^!fL&9G#$*@a`K5p za=Z^+L&w@a>V|rPK2~>k`a0^mqHiyOrWs)02fgE9-a`@1E$sZ|G}+b62Sl@vPH>sg)q9 z`N0^IFtvW6)pohgY-+Y4%i_x6nLZjIfz@pd&e(L_zkJiyZ-OkkG96}n-oT`KSr@To zqO76Nzzio;-z?T(rlnFp8OBDwtK#EVp*^(vDG-__gzydO*w+B#nQx7bSv()NG12Q$B?x=}~~V1x(&K>x!YoXt&a zOc;MJnSb-4#~LHyxSVL6m=F9R&F=}KJCof5`M=4y#*-iAZ&NK@M$CSlAR1_8CYFy= zV*D(q=}CP(D@2XJHa;%(n({e_WSn4vKk0 zHKczGaW7KeHd;WqN5{z%kCLH=%ZZWM#iWGKlx!!a_N>&R4MrA24AG~WSsZA{EQi&A1Kf7xC?NDfuM}3!{Z(c%0!%i*Z_bx$3W2c&-ltZ z04*Kpg@vQ6MQyWRlN_b|2ZpB{hY2g*u5`>5C(J&u`-RwOcjoX}`QQNhjhZKeq%gEf zXP1K;drUtLtUJ?XC4Z^D@}p6wYKjim z9YK=7O6yLtQwR6}sT&9$khj3mqH4!xDRqs*QeZp=Yd33Y+uj&dRBTAmf^sju-Kz;5 zO^qw==gNS_1#b8*j~Y)(`#nZNAi*xPKO9sKQ?9mat8NJyMAfF}QFH^>vH%llhI8b- zYVsz;UCM@fT{^B}S-gLn_r>P%#nrd|&WsNpj<O3$!L^O7Q^65#B0Jc_Wdyuu zOMt7B_7gomAqxiU(Fk?jcES=MHxdAf=x5u}6ZvCkL#4X5^&7WTgH?@1Bm58pOM|lp zTZI>daQ{esAaANOCBv>Z3w!mmQEqfsV;LV>&f3Y1vu!!6pXIbh#)R`D3HTwW^bcDI&=UaQK=i2Ib2`?PhPWDL7R5c}M zIjE@AKu9u<#1JggHpD&fVAVb*GL3CMH2=u$iGWCiKqul4C!~S`z5!f~1Iwmxfu}N5 zS4lG`P{!}Ric_2q2QPm~Fpd4da|Fp0QIYO|W?YkXkqWN>jaX70m%WRqU<29)?X)}+ zhz7Tgtu48k;M78e;H0uHC4HP;j)r6T{IjraM3A(%0lPs|%huH&9Qqt-q(I@E;nT;!*2Zv zRcty5E~*Dn!QfSw#01o5<4xNk}y?Yf6QH0vt9 z!wX-h@~cTN%r`r~SF9fskZz{Sa&A;<#38ym_mfQmx|fblx}_-s!s^y4Dj!WsDB@N% zKeR*Zl+DrW=`~bBQ!O+S$EFUGDKx|sSMiJy=cu_7(|yIjKq%l#{`{ipDMBC%D7Ct< zBsU=ImI=NVzNe1W^8o(%kYOh6w2*1yA=7(f$ZHexGbl}h1U1;7mzSfJaei)Ox&ZQn zNtg}JP2ZREdPx%8W`AK*xb(pfYL$}gMp%vMB~J@vgx%_gTQo`nn_~24bV&>k7HW#rcUJdj9pq*E}& z_mQi%9B(G`L6esK9x}XuPlcdTk(>ZVnG#;lPMKm7@VY|_IRLh)5nhI%r>zU?t+LNSN{7-dElA*wdv2D4LRxDd5Z%S7JmWprT5*>T#*-B3w zb95?_q@FDYPF&i=CaFlErDlvz{hw;&FoxWKExt$3K20?BRS_2I`7MYHmIlu+9osvd zolxqUy9Oh;Y(pcm72|Y`P@g5Mrl{Cj>t%D)j%K|AbsHDW5ec~0g#&3%92*snIAQA* z8?}n3wM9B7=%#5i=PIX{B0m*8n_E~lU(Nsr+rG}7^8e!m2nSsJu?N~h$o_Rf=w#yT zY+-BW^xNcDrz&H&#D(0>SpE8a+yh^umhQb^28rvTOqjctGQ+L3M1n{VA-KeX1LotJ zn|NqaD-3L_2m=he-R_9}9oKa$a4Z7RRh@?9ZW*ppbQN%=jTU6Lw>mds$#n%ONvcak z!ajP|irebcTtuQe{z1Ud zJWS#KWwn^rrQCvmbA4mAqC`0cQfNKo9_~*a5k|8PcgP-tLSaMho1{nwxVSz1YQr%8 zG|mb-Jt!?54-^;{%ET_pu?9WK=%_+2o`9i}-_~MtxWV5XmVN;#g#hD@Rh- zqf!=O3H15YCWd@3x5vO$T1R15wv$h?R0y0x{N7i%QT+J|RwBWQZVG3os{u`0N+3gz z_yxri#!2i){XWSt+M$}o`w5}|Qb?_5TgeI$e;JcVvc0TOnCgYV0gJ&`N)#r4a_aNG zI<6V0Fg1B^$&L*zU2p3IiY{}r8Aj#vJa8t?RDORdjxRV>Op^$82a$0L@#+eSRiO!j zeCObG7p4Q^8ip`1(B?J>m&A(4Hd$xPH0v8)d2NwDd;?jR2QDCu@9U5?;aq)-Y4>lc zGV*6yeQEUEJK%OO^n%94<|Zt;GZ?lS6a#O?-7(*W?^yl(V!{mY zO?6f+qK->ft^}7Vw8OE{VhpUw)lW)cg|-vz!Y;HDdnm#Rm=aB&$c7hG;tLqUx`%oF zadf>GC6fFK1R=)HXk zt|eUHIIbQyGzecnN=VD_cWd0Y$@!LX7nxzRyhLIa?zz;nK+#U`+Au#)lR}6Re2)sf zL?79XmJs%V;neHt^FZW%dRJ%>RMO4VV`(pyg8h^w*0rYimuG_-B1za-{**?lL)~Bt zHZ7TM6*#$Lh@2CXpxIB~cfhH&om1d4>T+JoE6t`BSTf=BmY4`eF%+2E@JIOItMYd4 zJXur|^w3IfX0lqN2e~>+NFUN$`^w9DGa@<=W}V z{pp6|qVtVT;wntthN3YQtl+kPcAP`a*!Yp_$r|yPItdnafge1({y0r%hh&Ll@$`9l z0k{n}))+TTynJF#U_H(E`Edy4o;Oj*vda=W?~XLv`@_Zqj=}m3ntIUHe}s@Xv0u;Z zhm0mgL?s;-4~_aMz>_Nzm48=2-UzMFrgd`2R{9i+-WVAWzE2c&-MB%jKIbCO$TNLH zxB(oQk^+|TVcMvo$!D^BRkxM;sl=OD$T`sohZx}p5BZ8HVMY?KWQt$$b0?f9jrG$!QWGuj*cm@`f+lv<5CFPs>OD^IzA0MKH^4!v##8LGnIn; zYaDL034Y|I6oRy>IsRatn!YdB^U0B_7$|C0*$H)m74` zj};|`t9;zV>W@Qqv>2N3C-Jt(ahP{8!fbJ57s3m@1`-f%N(_Fys}D$Wv(LS09QU+2 z95o*VEDtLRXOd}WFfVMvl5w-+I2)PSrqty5IqqO1#-g418R8Iig40pImo+LE6+NV3 zsWu%V#Nd$+pNguZd2+h=}FO9z|N#;gYAm;qe+UM^7On0H1 zJ#Cjy&)mgJV%wu&8?tT4RWk16m$Khfa3aBv-0d^Q{MLI9#cO^OwmC1tp`%4iVStxg z`$}P843;4AUhFfZthy;unu5n(5Z@HnYa)`JHj8-2H@^EEas^*|vVJMv{?ASY_7~o9 zq!ji=T$hIk9xFi-xukj-j-51_Rd^nEt)#I|t)v^qWQ=$`0>Q5rK@ni(zW*3IID@5@ zl?AECCr~TeAL`*|VyN_&bxrz|o)svL12_kM?+_YjEgVFnRyBa{*X`3^f(kj92_|oj z3okpJI$JDpx-$gEy#luwh_7}=7q(Hsv9Fu45R_v5=G3t|2YMfuESXSYM0VgzCWIJn zKN>bSwWo4wKpd%s#L6lD2rC*s&{k%iO-;zBwGRtc58N9;2~>(LG9b->C0z2%o|vKe zT3mRMt2CuInESFevP|QAOju4!SSnMBiXGa5q-wDBw%+v1!(1RcS5CmdQYHKmllJ~?ifAV*lULgF_^s&Ar7u10S0Q3|90HEr> zSw<&k4{MX(mz2jkOO8vTC_ZbY&roPtu7r4@BqD>QRMb)pvz7JudjP2z~-^{jvjoHM+YB2&)^fe zUd{0r;}N48r`UYjr0}YPm5HzdVA>5Wzea@y!Du(ta8{ITR5T?7M7*nj1V;Gyo*k<1 zw@ru2<7h#mxqzxY0ae|iY4`deAvZz^RsaV%ElkT(_edNRT@P4W}Sri(zKYft{ znLjAX|L}G%=c~wtT}Okm2M18_`Suv3V@Hr0X!OHC;Em?@zy#t%X_;99G-9Bn&;})# ztHdUgcB}_#P4Hv4cw97pe5lyQt9ZIXa&w5Q^&pFRbCgLms?>Gq%lzqH%x62=vK8;g zI%UphqKmc5w@bnv6rF{fCHIycCH!*|>1M0l+(S=HZRq%D73G2x>yMlc7>GRtqqR>m z7`1&FQujFy(OEn7BgqEHh*8GjK;FpS(<-_atn_`Eg{+-mhngKpS4%6cp3PQ6+-N0m-8~jd_b3UOg)Uf)>^()6~5QWGKVVsNhyVJb2R!Ma|=~azADj2$1=< z!<6+U-uuE;=cKMvJd&HSoO*WaK+pJ~^@P^_w9K>VTRx7^d4`=C-1msqyHAKknL4MSj2k|DN+TJ`naG96$dkIl8P&l8N zftw?ivg>J9u42KxTYl~ju)=ZK@;q!y+fvkVF;^4)}Al%#}B3E<(0dHs8UN_Z%_S$Qh>6VVa*=T&6CV zW^#t961$+BM2O(FXP;DYBmsfdl7KmbudxH!*hyW1{6jnC`SB!|D(%N>HLzu!<*s=s z7C?A10mi(Z&LWuZv#eQObc+QayZj@vusA4!apKWSwuuhaKe&%Tjv~@mTPqfO+vWz643PHH1ZhO>{bh9OP z`96*Az2+aRI{dHIFDyRSqVNquA zrk;{Ek3NBwok*;4F}PK(+Y zPJgnRj*v8f;%qVLkKTshJd%4iKFXu7J-Nr20wM__3USCYv!xJGwaXo@Hd(1HW_fOy zwj{e(CMa)t>mvqiX)kvczTOu~Bgq0?XuLkbpCvVOY9-CQ@Cs<65g5376dJGhM}FwW?YnJ zdVIUmW7ODR{4**bHXgs=Tjepb4ftcEFTzeNJ}R4v*Uk(o(P^28bJ5THBZ>)^#|l*0 z6}!U+A`DrFzIM@c1t6-LmO>w2bVhe1ey-DUjgT|qN7oS9*q@eBuoA8bqcsDF6Wrnp z#N!jN1n=x8U>CP?(d*noRgrtiHE-14&J*)6LLDQU=LkaA$$OQf9V1iafICs<2%^== z6Ti_cMh2%>ZUS_}pf7F(!Jg;@!76BjKI4L){{{Kung06rnPrJ$!Oe-s)fHxwFS0XTHQ?9Z^o7(B{4ORuH(JlCr##ce@0BNL->U+>-Z8{)uUfTT*ymqH+7fN>!lJ6=Hl(XoZIw!q z$W!w9Hd}Uf7e%dJXfk*M*+5AOxla8979_$rv(4Nv6FgHPmbm4#9b zN?b8Z@xi3nh3#{3x|);_a?(qJK`q@~Aq6-j`X#PNqpoi#OTi-E&ma$`T0-xLvhyYw zlmtVomlA^~9cDrq)BdGNd-;EwEJN0X+5H@fa0IzQLYIpnHCVn{hWPz1G#FY|83N0# zlsJKP2DvoSQblAT7pK>L1g{ag40{vtC%cfgeI+)Ju@t*izjaCQdJ4TjCG4off?@_6 zXcV`{&^i`%q}g|MKh{I*g>tYPgqjs1hmyV)|7$1=TD6LxYt52A6R8U2ty=Q;Gfmbd zp_bYW)57&$_6?+5zdins&Fl>p&pCdr$ypj z%=si7dAVBFj&=Rfl?9&X3*i@C#wUCA-R``fU&E**!~|%9oJ? z&6Ql1PFID49ak7;)1on&6!m`!vnQglLpaxd&Kd4SY3;7cAuLuF<9TI^Ld zPqKH=yFOgZF(GI3aK}!g#WJ1Bp{WPe|6=ZepT2xQY90UBxC-59qS5|N*0zIg4SG*1 zG5NWLK#p>db2?h(O%oPV!K>xBHYyQ-j;>YE;akVvf!znV?dyicvK8(0y`Do}iZ>Hn zV-iq%CB`MbjpT!7u2lwNKpK*6w_ROQA=j1m>xRrz_S%-@=H)<;J6j9&LRYeNsPd6@ zhF0TdrfsZH(Ib*Ho51^PvFxaa<|Ed;bIZ^sC;%>dj4BCV_+H z4Ybn=MxS1KK&vciH&Uk_3Qel#-lfvGE#|f z%fXij9d^?pUY=dkhhT-U0$X5Ho#jwy@lWYL_?h<1R7Xb+1B0)=VfJzxt8(87)cdd` zzP6k{B^izY7YDzk`ru^hb=qf!mw3y*onk!-moVw@$ZjckG0}}s=35YJ$FN9t8d{2{ z^G?m3Sf1T+(cs`m9|hk0dlS3zLg3izP=>jr zi^-(u0nEFC!j9RS{i|oz<)Y0HV|7d@8~ErDERGv#&)`m#O zP?U~5b0+V0$@V3qB7;~-P=*!G8FRwTMu9-3+;9dCNnKP0L(`Yfhnw;{Y|r}PE6uZ7 zX@yu?VoKuZ1_&b9@zpt&#wxL1dp%8zdU_Y2y@GLOepS)#TWx0r7Y&3X73Nc~%%pfN z`lN9BBGcuYoo}n6MJ-{uqMZA*jb9lT7JO4JjyAZhFf8s-PILkW@zE6NyEJDkX{4#@ z`_d9$kVVfuiybM)>)NghR65p-!NA)L%^jn=ciXg#7nXCYoEuk$P7xTrNA89b*sWAM zA@!57+Y<#VU`u35DNXUW-1Tib03+s?8&61`U9l2$YV80?8x7;A=$%dmJjK24nI87I zDK%F1;FI)Y^kD(B$_4+OgZRUCoK~lTQ^{i2vnz8hbW$a8p=PsUS(gD+l}l_7{8?}O zl>)pv4}0xGc!2XpDyn^qf@=(*ys}5a9<-^)+P%S5oImb@X3}98O+FHxF&17gt zLm!`KD8*;aST-bf+ZTNYt+uzJHuP9BscuRv+{&Yqixjh($*Cb&YkfFHf7&fpSmL}- zD_$*TZGA|4kXT!Pl{w1kiq_O=JloKhBJ<+C?LFvV7gZgbt&`Ee3GYy-<=Xoa7U#55 znpz?KCUHNk!eSSZjygP?+Re1?F5&t5pP9ICW@TYEAgZMo#1td{l_@r`xBm}O4B|fi zb)+i}$%0bP+DI>u6izFYf@p#oD&=W={KX0&JsXZ?3^(SJPE=3UzF82xX^NdXJw7!z zHB=|iipOUXJ~<}Ik_qP(qRp6W(A}=udETZNGel8^*`i~(MD6n)U}V%sP4ACk>x-bW z*4~zpd`YUI!nGDjpPZk}cg#T#U6f~4Q_l%HF-kQ|qB=Hg?>R(}cv-Nvuu&HecQf$)O9zQZ3Sq#FN4E;s6)DcuVsT zp!`+=Y9md~LI_HW8ou%;PgPJT^f9hqqE&mfaDQ;v-Rf2u<4TLv#?>|@S8dZ&$$}^G zMe@QsL@hyu*av|wKo`Vwgg%dJOFesORYqqZLr3F_%H@K$=D6GF5gh4Ux6{U-IX+^KSex%IQZX1{55d=EBx;< zr9Z(Szkvw={9A0q(Q;~2I)@e zhrbuTzwiIfT4&vR&b{Bg*13D{d(Qr7s9<2?0sgZw0gQSD@j31WzdO!dx3{*pbn|wy zG<9;avo<$%vvzdg_i(V_b2N3ecI9()vUIR;GV=J}-N_2yKK~mjF=<%K>4GSfRs4u-?6S(T;mhzSMr4 z>Wu2&dbG6nUDV!LBj(Kj)8p!>?{tpFK-=OtysTE+ew#|x1eZ>gozr{gS6-!=idZp^ z2O>R-nUHw>h@FPO_0nGx>p+oK#a)4diB4?&y1n#+JV8mE)#RFMx!GJ9{Vpv(oejMQ_yjwh^j`4(NeHWA;OZRaW4X-6gp4rM|zPt0+?JWl2Up+c37|j3u?ui9D06=zkeJ-yp z&8wvLjNjhW!TOb@tJ`naevav|=@P&X*$Zq9Jdix~peayu^aOYFnO0CZn zQf#IJ(jBu8EMH3Hb{!a0i7yQm#ZG@)RpQRmFwY?@MzxUTg#M(g8tVJ_EL-y_jt&mh5=0GAxU6 zemuLm)nKA3&_CP6P?HD&Y#=K+;H&yyu7-pItVT~yKWQj6g>AIG3v!Tamb?udta`QL zBdeKby!&G&09_c6GPf|NAXoh(|BX-$nNg#($-XrJLzLkqtq7NWY70g+VDs zWQRbrcLdfpZ@omGDk)f`Zx?OPFLIlG`^q<>!y+*ur&U@<*S&F$k0IlN?;K+aoXf3c zA*S>c#FF5^b%;6!Un|}ZHBk9Mb&squfQ9g6P9$F}GXcOJN$E3^eCj80@M^X)xVRrC zEHn@9d$`x}fOH5yD>rE2MfJ#Q=~pGw#cQ+rrKDum#MsX^1ZMe!>aw*o)=ktlU@>Iz zpT#o8^ZE79A3p!?MuFLL;FagkHt>2ljd&qOJmc-IQ4*6Z1~rd@`PMgOC~AnRJ%o64 z*)WlV=GQp%<=Dr(_<-zAg3YnQ$jlp+0FoE?*6XV^z5$FEdy!F$PE7TgCgn9UJ!xQDuXvRUeNJ`#Fa|eKjhsH_6&AK^M^syW>}~dbDaiK8Xx3Gj8>EGAV{vZ#CsS zHvC5jKh~>v0-?4`1~L4-r!9!QjvwWYMzmd9i~MiKPsrvtDok$Zu=g-~fh4XSo%m>v zmoh9-pO46nbyv4D6)ZCfy*yVhm2twLXxmXK$^;tn?{44hIU*Pq+y8Ag6k|v;9^J)8 zJuUzMx_iUi(ZTKapYV4MyqI&C6TtUnsCY+IW7X$8Mo>yzNGY&w%?r=I1bk1V%dTTp zbVfMo7U~d6|2##hX%rTFAl}t({8rAB(puI_ ziMqo&H`vORuHcB~`~7RV_@f(Jm}d49_ORgBwVL!vdHR|4pK=vT3pq&0x@!`ns z9}+faZUQ@#8-Pk=->|qeO|Nlz@QH?P4=j3mxdyp>?G>%-^j3ik4AUOV8NK{CeOQBU zj8~l~&qQEC*A7lsj<5Og+a9gIKV46P;kL^hJ9evnV)hUcvJ~!4CT7t3kyQ>0 zckn;qt@x2cNYxyWv!{v(NiW~`wasj7vAjg$(Gt3(kP6_LLpeVBcT&EPD@%zpo1Y8T z%ZRI7iskgtb&Bp+*}w$`JY>lVW{EFWj7=pt?Xzl##dmRz91xIWs%1U|Y@Q@qv&G^$ zP!0;LzhfIS&^)C6Nm@XlZJVpN(fwmUa%pGb8b8D!e$85S`zedaB;7hBe9)_kj|9I? zlMYLdfJt)XCzS_A{tQj=SH9CQu~y$qG63-V zc;V{iZD;w<&c9-?6@{uJzBS6a4b1BDow(RJpS^QY&r&Y>gs?YX(b1LVp* zFQ+|4S2W^z<_^sxciFHpp#%(FDM$%L=c}dAHTPx7gO)IP@gpBkLnO+YGe^{L_zKx} zO{@B*JensG%B=O|$k=P?cK@LK-8mz9xai8bVo;i}x}`>cQV<2bY7#jlv3Dz_ePF!E zMBYFZQn%Wp9Ri6th!0Ukw5fI;LJSN=LbwwVbwVa0~W};_2ty;A-o_IQzpDwH1UKsfSl*LKa+YuWb4tGAri|QLPc^;>Fa!_quCn9ach9 z7e&s_;ugEh&jgy+)sxyvSc3?f+vwn|*AH~)|DC1m) z7!lrR$&5R)-=o~Q)9#k$zjmPLlf{Tq4j<-onFq zO!@C!O>MC8yE~M#FMjY3mj|GA)#T#w-kl#z(z-3s@{CAVh7>^PdDi(TW9dX$_Gkp< zaShS9Fe1sBn)MA@;$)<>`RwK@bCehTXZh|9BAFOg9&W}6V!t|Xo+b5dfulHyXjkgN zST9`M$W*P4{9KCi3ME<{>NGEGkyyJaYkN@B=)!v=bBr%!DG}4EjZcLIW_n}r^aYXo zh?Iezx?bFx?W#(kKUXKb4?*_cJxR76bXa(N(CI_4OLd9RrA)iW<%>M4Y0tyL|-@Qy;(AyS?h@SF8XWxtI5Iw zZqnD%C4sI4Ojrh086Z^Tus*ue1+BP#zKwb6Ie6tj)VP=V<7#e?HH?%SsG`NOP{5Ao zB)pHjKi@DZTc;%3*s;y3VHSkbtyN)Rn+4B&S_#C%_eY2>qg!H8`+tpo5>9%|Bo7!? zx3j1iLWNOI^E`!@70mi66{VO2r?i_WWsBtKjsgj-66O!5*BOo@++k3|MTY^{>xEsP zcpYwNz<7XpbbkQn3kCMtrQHRb`IJcA{vy=nL;iKMl9*bz{bET!SyGHoTJxovI%XOB zYbos{QviL@^C?k((lNrs=oz~Er4{XghCxju^9|U+$sdbiTufd)cTA=H85B|;ML}NO zFSv>a{#CP?DvC47p}Q!ZSn{;ZA`3$j8cux)=+zbhF+J}ra?R*|qe+)bD^8p7g>R7* z^JVOb%Hr3A1a<@EFfH=rdj@2SsV0*5I9k)GPWq2Xmp<=IED4fe$%*{TZrYOs$*p9C z-Il!CvLfH8OX*%vt*M%_FlnQ!NT3Q-S#2bDtcD^8p2_6b;5c)>Z3B)^>sP#f{>9O5 z=DtS{(*=exQxG6UR_%OFms-^q7G7MKn6#l96vE_z;wdMr$}5+;kNZ97R4k`kurc6M z7lhp(@hF?tTZqjv9u5OoTp+b7{#0&!`7i*|R8`sL!M zN)`10Q?VsOJ7Ds$WB{A~Df+nt=jRNec+$89e{=1digMZgg*UwW6M?sFEOIBk*hG`6HDO~bwv0ATl$wceY~_kT zpnL9HBvC(S@)>Nlvdh9a;fjwlUykWf<4xTE4G7+f=E)Fh940!U9D8on1w5n4`sFh$$Ob3Qdf zbq&rwqNb*2n5tky>UE*gV85E(0J+#Fghxx^bYHGby`YDsOU}0ooq-Wose0X^=$kZ} z)im`h`nJw4zPxp<-*Sn3US8;iZ0q8!Ui+NviPz!tEseaKxLaaCX#EFt!S5xUC9n0h zg=e_zb=BR_yHo_%_%3rG-Rc*YOB5H0he8H5k5~qdkL`57?Wk;Xtq$6H%2}YE&m$K< zY95KNmJM%tJ7f-Cz3L5_Yv|qCCNbFIft8&jPg|!3bfIO@C0?xZ$F~u1Slht^k+iRJ zK6bK)MQ}=5-R{LHc!L@x8}{s*IG+RF!J+_qdWz$Hd%TOuBeL8M2-_2JzJt-Zc&~f2 zg?O*t7zm*1!O6OzIbu&gavfZv?zg{7yU)Ry0Ztxp?ojE02AGQWnski*- zqNMTVu-Aq}iD7i88l8S=QII=0ebth@euF(rTBaxW8kvSXw%J{-?5M;IK z3QQd+GEQDhk)e8{uLc=I9p%h>jdRsCzh-(}I(PXlz%(E0w)yQ0Xiqv#!hd6B(kF)U zVDLs+ceDP>_o8}@SGQ!L(31q#Y-Ni3Q9A5O6x3g`>AZgWOo1_2t%}PcTqlYeb;DRc zNSrN1Z4^|(kcw$9)^zv_PA~h`nj{9)Op3L#XfZQEXTZ4kO5$P5@$b>u z>PN6vt;xWrQbuKve!0F7@3=K$7SG~^*_>e`x38P;}`92xg7KIXBM zjAh-nMU&;Fj3M|+&3L3ludS#y7i(%bE3-n-;yC7@_gTyiv4JO}h&|{v-Y2zyVx>=q z-FBf4PPE6uv?@&)?mXZwdSGT0!^CMJn6&^)d;1eIaApNswE9wpi2bwQ{l;J-E%%^V zO<|x)e*j0K-Qo#ro}qZMv<_YT?bAq-2|8&yzj4U)oV1}r7VLP7ya4po5Ytjp9T?09 zH%9bp5-ceQshJ71MMyVeo5kYEYml-M@nGFvpWR-mBteQ;H%gj>Fxo>N=So8jb5QZ* zX}luYDYT<{jkGlOCDaBzHT3qz6`8`bZLgl#*7%ORo`PLM4?mBd@-Dp0-JvO~el}Mx zbu#)9&miL5rX*LH%~G&HB0i^Je1*O0bvi@|uOA|19H&sA`bK>tQHZYeX*en#Sh8YJ z2`9_YffygvBa|@PzX=+4<7&S!Jzeps3%0>Qo;*YCVrE5mn_psoB_+M5TJXW35z^;X1=WKsuu`cB4~a@r#gHc3?@oH@8YiRp-XvJmdT* zQbI#ZU5ds}FfbEapq)Hx(6+<9H~SRh)DwfYC{E~U{ozl@G<<3U1*dS8dKwHa%UrZp zq}#1*FJ0_eX1oXIiDEMK80^iiQCktTde9;q;LonmHB|iqk~iL{$C963`zj7425R=J zpY(m+GZ4fMraMysM0OT#{^~fJI2QpT<$} z3eUV|`SyP*3p8}zf2xSP3Bdn_l)q>HwEsqy8Y;h4|38iVqj-LI`@h-b-^#zK*B>Rr zUG@KO8Q9+@{)R<=OmN@fqdUg>4?6l=^*3PnqvFN-7u@i-@bBvDk5K83>i((4G*qze U*eu|V*a881cPuNH^!MmL0Hmf#h5!Hn literal 0 HcmV?d00001 From db83807e8f7242ed6dd6c74ca0086fa53dbb4d4c Mon Sep 17 00:00:00 2001 From: ok-nick Date: Thu, 8 Aug 2024 12:13:53 -0400 Subject: [PATCH 18/21] Hash central directory and add unit tests --- sdk/Cargo.toml | 4 +- sdk/src/assertions/collection_hash.rs | 213 ++++++++++++++------------ 2 files changed, 120 insertions(+), 97 deletions(-) diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 27959873c..240d4119f 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -122,11 +122,11 @@ sha2 = "0.10.2" tempfile = "3.10.1" thiserror = "1.0.61" treeline = "0.1.0" -url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. +url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. uuid = { version = "1.7.0", features = ["serde", "v4", "js"] } x509-parser = "0.15.1" x509-certificate = "0.21.0" -zip = { version = "0.6.6", default-features = false } +zip = { git = "https://github.com/ok-nick/zip2.git", default-features = false } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] ureq = "2.4.0" diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs index 595332438..571d755fc 100644 --- a/sdk/src/assertions/collection_hash.rs +++ b/sdk/src/assertions/collection_hash.rs @@ -1,7 +1,7 @@ use std::{ collections::HashMap, fs::{self, File}, - io::{Read, Seek}, + io::{Read, Seek, SeekFrom}, path::{Component, Path, PathBuf}, }; @@ -208,12 +208,13 @@ impl CollectionHash { let zip_central_directory_hash = hash_stream_by_alg( &alg, stream, - Some(vec![zip_central_directory_inclusions]), + Some(vec![zip_central_directory_inclusions.clone()]), false, )?; if zip_central_directory_hash.is_empty() { return Err(Error::BadParam("could not generate data hash".to_string())); } + self.zip_central_directory_hash_range = Some(zip_central_directory_inclusions); self.zip_central_directory_hash = Some(zip_central_directory_hash); self.uris = zip_uri_ranges(stream)?; @@ -392,11 +393,18 @@ pub fn zip_central_directory_range(reader: &mut R) -> Result where R: Read + Seek + ?Sized, { - let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; - - // TODO: https://github.com/zip-rs/zip2/issues/209 - - todo!() + let length = reader.seek(SeekFrom::End(0))?; + let reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; + + let start = reader.central_directory_start(); + let length = length - start; + + Ok(HashRange::new( + usize::try_from(start) + .map_err(|_| Error::BadParam(format!("Value {} out of usize range", start)))?, + usize::try_from(length) + .map_err(|_| Error::BadParam(format!("Value {} out of usize range", length)))?, + )) } pub fn zip_uri_ranges(stream: &mut R) -> Result> @@ -455,91 +463,106 @@ where Ok(uri_map) } -// TODO: blocked by central_directory_inclusions -// #[cfg(test)] -// mod tests { -// use std::io::Cursor; - -// use super::*; - -// const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); - -// #[test] -// fn test_zip_hash() -> Result<()> { -// let mut stream = Cursor::new(ZIP_SAMPLE1); - -// let mut collection = CollectionHash { -// uris: Vec::new(), -// alg: None, -// zip_central_directory_hash: None, -// base_path: PathBuf::new(), -// zip_central_directory_hash_range: None, -// }; -// collection.gen_hash_from_zip_stream(&mut stream)?; - -// assert_eq!(collection.zip_central_directory_hash, vec![0]); -// assert_eq!( -// collection.zip_central_directory_hash_range, -// Some(HashRange::new(0, 0)) -// ); - -// assert_eq!( -// collection.uris.first(), -// Some(&UriHashedDataMap { -// uri: PathBuf::from("sample1/test1.txt"), -// hash: Some(vec![0]), -// size: Some(47), -// dc_format: None, -// data_types: None, -// zip_hash_range: None, -// }) -// ); -// assert_eq!( -// collection.uris.get(1), -// Some(&UriHashedDataMap { -// uri: PathBuf::from("sample1/test1/test1.txt"), -// hash: Some(vec![0]), -// size: Some(57), -// dc_format: None, -// data_types: None, -// zip_hash_range: None, -// }) -// ); -// assert_eq!( -// collection.uris.get(2), -// Some(&UriHashedDataMap { -// uri: PathBuf::from("sample1/test1/test2.txt"), -// hash: Some(vec![0]), -// size: Some(53), -// dc_format: None, -// data_types: None, -// zip_hash_range: None, -// }) -// ); -// assert_eq!( -// collection.uris.get(3), -// Some(&UriHashedDataMap { -// uri: PathBuf::from("sample1/test1/test3.txt"), -// hash: Some(vec![0]), -// size: Some(68), -// dc_format: None, -// data_types: None, -// zip_hash_range: None, -// }) -// ); -// assert_eq!( -// collection.uris.get(4), -// Some(&UriHashedDataMap { -// uri: PathBuf::from("sample1/test2.txt"), -// hash: Some(vec![0]), -// size: Some(56), -// dc_format: None, -// data_types: None, -// zip_hash_range: None, -// }) -// ); -// assert_eq!(collection.uris.len(), 5); - -// Ok(()) -// } -// } +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + + const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + + #[test] + fn test_zip_hash() -> Result<()> { + let mut stream = Cursor::new(ZIP_SAMPLE1); + + let mut collection = CollectionHash { + uris: HashMap::new(), + alg: None, + zip_central_directory_hash: None, + base_path: None, + zip_central_directory_hash_range: None, + }; + collection.gen_hash_from_zip_stream(&mut stream)?; + + assert_eq!( + collection.zip_central_directory_hash, + Some(vec![ + 103, 27, 141, 219, 82, 200, 254, 44, 155, 221, 183, 146, 193, 94, 154, 77, 133, 93, + 148, 88, 160, 123, 224, 170, 61, 140, 13, 2, 153, 86, 225, 231 + ]) + ); + assert_eq!( + collection.zip_central_directory_hash_range, + Some(HashRange::new(369, 727)) + ); + + assert_eq!( + collection.uris.get(Path::new("sample1/test1.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 39, 147, 91, 240, 68, 172, 194, 43, 70, 207, 141, 151, 141, 239, 180, 17, 170, + 106, 248, 168, 169, 245, 207, 172, 29, 204, 80, 155, 37, 30, 186, 60 + ]), + size: Some(47), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(44, 47)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test1.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 136, 103, 106, 251, 180, 19, 60, 244, 42, 171, 44, 215, 65, 252, 59, 127, 84, + 63, 175, 25, 6, 118, 200, 12, 188, 128, 67, 78, 249, 182, 242, 156 + ]), + size: Some(57), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(91, 57)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test2.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 164, 100, 0, 41, 229, 201, 3, 228, 30, 254, 72, 205, 60, 70, 104, 78, 121, 21, + 187, 230, 19, 242, 52, 212, 181, 104, 99, 179, 177, 81, 150, 33 + ]), + size: Some(53), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(148, 53)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test3.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 129, 96, 58, 105, 119, 67, 2, 71, 77, 151, 99, 201, 192, 32, 213, 77, 19, 22, + 106, 204, 158, 142, 176, 247, 251, 174, 145, 243, 12, 22, 151, 116 + ]), + size: Some(68), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(201, 68)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test2.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 118, 254, 231, 173, 246, 184, 45, 104, 69, 72, 23, 21, 177, 202, 184, 241, 162, + 36, 28, 55, 23, 62, 109, 143, 182, 233, 99, 144, 23, 139, 9, 118 + ]), + size: Some(56), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(313, 56)) + }) + ); + assert_eq!(collection.uris.len(), 5); + + Ok(()) + } +} From c2feb82dd7b2c7c0325ab20ee4d9fa76ccb5c637 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Thu, 8 Aug 2024 12:20:23 -0400 Subject: [PATCH 19/21] Fix thiserror dependency conflict --- sdk/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 240d4119f..18a1b62ac 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -120,7 +120,7 @@ serde_with = "3.4.0" serde-transcode = "1.1.1" sha2 = "0.10.2" tempfile = "3.10.1" -thiserror = "1.0.61" +thiserror = "1.0.63" treeline = "0.1.0" url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. uuid = { version = "1.7.0", features = ["serde", "v4", "js"] } From 97ebd569dbeacfd59fecfa67ac58eb0e02324a11 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Thu, 29 Aug 2024 13:27:31 -0400 Subject: [PATCH 20/21] Use latest zip crate (with fix) --- sdk/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 18a1b62ac..3f57ab772 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -126,7 +126,7 @@ url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. uuid = { version = "1.7.0", features = ["serde", "v4", "js"] } x509-parser = "0.15.1" x509-certificate = "0.21.0" -zip = { git = "https://github.com/ok-nick/zip2.git", default-features = false } +zip = "2.2.0" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] ureq = "2.4.0" From a5d05335856f1d8724353df11b569f22e91424e6 Mon Sep 17 00:00:00 2001 From: ok-nick Date: Thu, 29 Aug 2024 13:41:23 -0400 Subject: [PATCH 21/21] Update log crate to fix dependency conflict --- make_test_images/Cargo.toml | 2 +- sdk/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/make_test_images/Cargo.toml b/make_test_images/Cargo.toml index e9cf79940..1e1d569e5 100644 --- a/make_test_images/Cargo.toml +++ b/make_test_images/Cargo.toml @@ -15,7 +15,7 @@ c2pa = { path = "../sdk", default-features = false, features = [ "file_io", ] } env_logger = "0.11" -log = "0.4.8" +log = "0.4.22" image = { version = "0.25.2", default-features = false, features = [ "jpeg", "png", diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 72213dd73..0fdebfdec 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -95,7 +95,7 @@ hex = "0.4.3" id3 = "=1.12.0" img-parts = "0.3.0" jfifdump = "0.5.1" -log = "0.4.8" +log = "0.4.22" lopdf = { version = "0.31.0", optional = true } lazy_static = "1.4.0" memchr = "2.7.4"