diff --git a/make_test_images/Cargo.toml b/make_test_images/Cargo.toml index a5556c251..1e1d569e5 100644 --- a/make_test_images/Cargo.toml +++ b/make_test_images/Cargo.toml @@ -15,12 +15,12 @@ c2pa = { path = "../sdk", default-features = false, features = [ "file_io", ] } env_logger = "0.11" -log = "0.4.8" +log = "0.4.22" image = { version = "0.25.2", default-features = false, features = [ "jpeg", "png", ] } -memchr = "2.7.1" +memchr = "2.7.4" nom = "7.1.3" regex = "1.5.6" serde = "1.0.197" diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index a9b3d3b1b..0fdebfdec 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -95,10 +95,10 @@ hex = "0.4.3" id3 = "=1.12.0" img-parts = "0.3.0" jfifdump = "0.5.1" -log = "0.4.8" +log = "0.4.22" lopdf = { version = "0.31.0", optional = true } lazy_static = "1.4.0" -memchr = "2.7.1" +memchr = "2.7.4" multibase = "0.9.0" multihash = "0.11.4" mp4 = "0.13.0" @@ -121,13 +121,13 @@ serde_with = "3.4.0" serde-transcode = "1.1.1" sha2 = "0.10.6" tempfile = "3.10.1" -thiserror = "1.0.61" +thiserror = "1.0.63" treeline = "0.1.0" -url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. +url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed. uuid = { version = "1.7.0", features = ["serde", "v4", "js"] } x509-parser = "0.15.1" x509-certificate = "0.21.0" -zip = { version = "0.6.6", default-features = false } +zip = "2.2.0" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/sdk/src/assertions/collection_hash.rs b/sdk/src/assertions/collection_hash.rs new file mode 100644 index 000000000..571d755fc --- /dev/null +++ b/sdk/src/assertions/collection_hash.rs @@ -0,0 +1,568 @@ +use std::{ + collections::HashMap, + fs::{self, File}, + io::{Read, Seek, SeekFrom}, + path::{Component, Path, PathBuf}, +}; + +use serde::{Deserialize, Serialize}; +use zip::ZipArchive; + +use crate::{ + assertion::{Assertion, AssertionBase, AssertionCbor}, + assertions::{labels::COLLECTION_HASH, AssetType}, + hash_stream_by_alg, + hash_utils::verify_stream_by_alg, + Error, HashRange, Result, +}; + +const ASSERTION_CREATION_VERSION: usize = 1; + +/// A collection hash is used to hash multiple files within a collection (e.g. a folder or a zip file). +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct CollectionHash { + // We use a hash map to avoid potential duplicates. + // + /// Map of file path to their metadata for the collection. + pub uris: HashMap, + + /// Algorithm used to hash the files. + #[serde(skip_serializing_if = "Option::is_none")] + pub alg: Option, + + // TODO: in c2patool, we need to redefine this field to also handle relative paths. + // + /// This field represents the root directory where files must be contained within. If the path is a file, it + /// will default to using the file's parent. For more information, read [`CollectionHash::new`][CollectionHash::new]. + /// + /// While this field is marked as optional (it is not serialized as part of the spec), it is required for computing + /// hashes and MUST be specified. + #[serde(skip_serializing)] + pub base_path: Option, + + /// Hash of the ZIP central directory. + /// + /// This field only needs to be specified if the collection hash is for a ZIP file. + #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] + pub zip_central_directory_hash: Option>, + + #[serde(skip)] + zip_central_directory_hash_range: Option, +} + +/// Information about a file in a [`CollectionHash`][CollectionHash]. +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct UriHashedDataMap { + /// Hash of the entire file contents. + /// + /// For a ZIP, the hash must span starting from the file header to the end of the compressed file data. + #[serde(with = "serde_bytes", skip_serializing_if = "Option::is_none")] + pub hash: Option>, + + /// Size of the file in the collection. + /// + /// For a ZIP, the size must span from the file header to the end of the compressed file data. + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + + /// Mime type of the file. + /// + /// Note that this field is specified as `dc:format` during serialization/deserialization. + #[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")] + pub dc_format: Option, + + /// Additional information about the type of data in the file. + #[serde(skip_serializing_if = "Option::is_none")] + pub data_types: Option>, + + #[serde(skip)] + zip_hash_range: Option, +} + +impl CollectionHash { + pub const LABEL: &'static str = COLLECTION_HASH; + + /// Create a new collection hash with the specified base path. + /// + /// A base path means that any path added to the collection will use the base path as the root. If the + /// added path is outside the scope of the base path, hashing will immediately result in an error. + /// + /// The base path may either be a file or a directory. However, if it s a file, it will use the parent + /// directory as the root. + pub fn new(base_path: PathBuf) -> Result { + Self::new_raw(base_path, None) + } + + /// Create a new collection hash with the specified algorithm. + /// + /// For more details on base_path, read [`CollectionHash::new`][CollectionHash::new]. + pub fn with_alg(base_path: PathBuf, alg: String) -> Result { + Self::new_raw(base_path, Some(alg)) + } + + /// Adds a new file to the collection hash. + /// + /// Note that the specified path MUST be a file, not a directory. It must also be within the scope of the + /// base_path. Read more on base_path in [`CollectionHash::new`][CollectionHash::new]. + pub fn add_file(&mut self, path: PathBuf) -> Result<()> { + self.add_file_raw(path, None) + } + + /// Add a file with the specified data types. + /// + /// Read more on the constraints of these parameters in [`CollectionHash::add_file`][CollectionHash::add_file]. + pub fn add_file_with_data_types( + &mut self, + path: PathBuf, + data_types: Vec, + ) -> Result<()> { + self.add_file_raw(path, Some(data_types)) + } + + /// Generate the hashes for the files in the collection. + pub fn gen_hash(&mut self) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = self.alg().to_owned(); + let base_path = self.base_path()?.to_owned(); + + for (path, uri_map) in &mut self.uris { + let path = base_path.join(path); + Self::validate_path(&path)?; + + let mut file = File::open(&path)?; + let file_len = match uri_map.size { + Some(file_len) => file_len, + None => file.metadata()?.len(), + }; + uri_map.hash = Some(hash_stream_by_alg( + &alg, + &mut file, + Some(vec![HashRange::new( + 0, + usize::try_from(file_len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", file_len)) + })?, + )]), + false, + )?); + } + + Ok(()) + } + + /// Validate the hashes for the files in the collection. + pub fn verify_hash(&self, alg: Option<&str>) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = alg.unwrap_or_else(|| self.alg()); + let base_path = self.base_path()?; + + for (path, uri_map) in &self.uris { + let path = base_path.join(path); + Self::validate_path(&path)?; + + let mut file = File::open(&path)?; + let file_len = file.metadata()?.len(); + + match &uri_map.hash { + Some(hash) => { + if !verify_stream_by_alg( + alg, + hash, + &mut file, + Some(vec![HashRange::new( + 0, + usize::try_from(file_len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", file_len)) + })?, + )]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + path.display() + ))); + } + } + None => { + return Err(Error::BadParam( + "Must generate hashes before verifying".to_owned(), + )); + } + } + } + + Ok(()) + } + + pub fn gen_hash_from_zip_stream(&mut self, stream: &mut R) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = self.alg().to_owned(); + + let zip_central_directory_inclusions = zip_central_directory_range(stream)?; + let zip_central_directory_hash = hash_stream_by_alg( + &alg, + stream, + Some(vec![zip_central_directory_inclusions.clone()]), + false, + )?; + if zip_central_directory_hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + self.zip_central_directory_hash_range = Some(zip_central_directory_inclusions); + self.zip_central_directory_hash = Some(zip_central_directory_hash); + + self.uris = zip_uri_ranges(stream)?; + for uri_map in self.uris.values_mut() { + let hash = hash_stream_by_alg( + &alg, + stream, + // We always generate the zip_hash_range in zip_uri_ranges. + #[allow(clippy::unwrap_used)] + Some(vec![uri_map.zip_hash_range.clone().unwrap()]), + false, + )?; + if hash.is_empty() { + return Err(Error::BadParam("could not generate data hash".to_string())); + } + + uri_map.hash = Some(hash); + } + + Ok(()) + } + + pub fn verify_zip_stream_hash(&self, stream: &mut R, alg: Option<&str>) -> Result<()> + where + R: Read + Seek + ?Sized, + { + let alg = alg.unwrap_or_else(|| self.alg()); + let zip_central_directory_hash = match &self.zip_central_directory_hash { + Some(hash) => Ok(hash), + None => Err(Error::BadParam( + "Missing zip central directory hash".to_owned(), + )), + }?; + if !verify_stream_by_alg( + alg, + zip_central_directory_hash, + stream, + // If zip_central_directory_hash exists (we checked above), then this must exist. + #[allow(clippy::unwrap_used)] + Some(vec![self.zip_central_directory_hash_range.clone().unwrap()]), + false, + ) { + return Err(Error::HashMismatch( + "Hashes do not match for zip central directory".to_owned(), + )); + } + + for (path, uri_map) in &self.uris { + match &uri_map.hash { + Some(hash) => { + if !verify_stream_by_alg( + alg, + hash, + stream, + // Same reason as above. + #[allow(clippy::unwrap_used)] + Some(vec![uri_map.zip_hash_range.clone().unwrap()]), + false, + ) { + return Err(Error::HashMismatch(format!( + "hash for {} does not match", + path.display() + ))); + } + } + None => { + return Err(Error::BadParam( + "Must generate hashes before verifying".to_owned(), + )); + } + } + } + + Ok(()) + } + + fn new_raw(base_path: PathBuf, alg: Option) -> Result { + Ok(Self { + uris: HashMap::new(), + alg, + base_path: Some(base_path), + zip_central_directory_hash: None, + zip_central_directory_hash_range: None, + }) + } + + fn add_file_raw(&mut self, path: PathBuf, data_types: Option>) -> Result<()> { + Self::validate_path(&path)?; + + let format = crate::format_from_path(&path); + let metadata = fs::metadata(&path)?; + self.uris.insert( + path, + UriHashedDataMap { + hash: None, + size: Some(metadata.len()), + dc_format: format, + data_types, + zip_hash_range: None, + }, + ); + + Ok(()) + } + + fn alg(&self) -> &str { + self.alg.as_deref().unwrap_or("sha256") + } + + fn base_path(&self) -> Result<&Path> { + match &self.base_path { + Some(base_path) => match base_path.is_file() { + true => match base_path.parent() { + Some(path) => Ok(path), + None => Err(Error::BadParam( + "Base path must be a directory or a file with a parent directory" + .to_owned(), + )), + }, + false => Ok(base_path), + }, + None => Err(Error::BadParam( + "Must specify base path for collection hash".to_owned(), + )), + } + } + + fn validate_path(path: &Path) -> Result<()> { + if !path.is_file() { + return Err(Error::BadParam(format!( + "Collection hashes must only contain files; got `{}`", + path.display() + ))); + } + + for component in path.components() { + match component { + Component::CurDir | Component::ParentDir => { + return Err(Error::BadParam(format!( + "URI `{}` must not contain relative components: `.` nor `..`", + path.display() + ))); + } + _ => {} + } + } + + Ok(()) + } +} + +impl AssertionBase for CollectionHash { + const LABEL: &'static str = Self::LABEL; + const VERSION: Option = Some(ASSERTION_CREATION_VERSION); + + fn from_assertion(assertion: &Assertion) -> Result { + Self::from_cbor_assertion(assertion) + } + + // We don't need to check if the zip_central_directory_hash exists, because if it is a zip + // and one of the uri maps hashes don't exist, then that means the central dir hash doesn't exist. + fn to_assertion(&self) -> Result { + if self.uris.iter().any(|(_, uri_map)| uri_map.hash.is_none()) { + return Err(Error::BadParam( + "No hash found, ensure gen_hash is called".to_string(), + )); + } + + Self::to_cbor_assertion(self) + } +} + +impl AssertionCbor for CollectionHash {} + +pub fn zip_central_directory_range(reader: &mut R) -> Result +where + R: Read + Seek + ?Sized, +{ + let length = reader.seek(SeekFrom::End(0))?; + let reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?; + + let start = reader.central_directory_start(); + let length = length - start; + + Ok(HashRange::new( + usize::try_from(start) + .map_err(|_| Error::BadParam(format!("Value {} out of usize range", start)))?, + usize::try_from(length) + .map_err(|_| Error::BadParam(format!("Value {} out of usize range", length)))?, + )) +} + +pub fn zip_uri_ranges(stream: &mut R) -> Result> +where + R: Read + Seek + ?Sized, +{ + let mut reader = ZipArchive::new(stream).map_err(|_| Error::JumbfNotFound)?; + + let mut uri_map = HashMap::new(); + let file_names: Vec = reader.file_names().map(|n| n.to_owned()).collect(); + for file_name in file_names { + let file = reader + .by_name(&file_name) + .map_err(|_| Error::JumbfNotFound)?; + + if !file.is_dir() { + match file.enclosed_name() { + Some(path) => { + if path != Path::new("META-INF/content_credential.c2pa") { + let start = file.header_start(); + let len = + (file.data_start() + file.compressed_size()) - file.header_start(); + let format = crate::format_from_path(&path); + uri_map.insert( + path, + UriHashedDataMap { + hash: Some(Vec::new()), + size: Some(len), + dc_format: format, + data_types: None, + zip_hash_range: Some(HashRange::new( + usize::try_from(start).map_err(|_| { + Error::BadParam(format!( + "Value {} out of usize range", + start + )) + })?, + usize::try_from(len).map_err(|_| { + Error::BadParam(format!("Value {} out of usize range", len)) + })?, + )), + }, + ); + } + } + None => { + return Err(Error::BadParam(format!( + "Invalid stored path `{}` in zip file", + file_name + ))) + } + } + } + } + + Ok(uri_map) +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + + const ZIP_SAMPLE1: &[u8] = include_bytes!("../../tests/fixtures/sample1.zip"); + + #[test] + fn test_zip_hash() -> Result<()> { + let mut stream = Cursor::new(ZIP_SAMPLE1); + + let mut collection = CollectionHash { + uris: HashMap::new(), + alg: None, + zip_central_directory_hash: None, + base_path: None, + zip_central_directory_hash_range: None, + }; + collection.gen_hash_from_zip_stream(&mut stream)?; + + assert_eq!( + collection.zip_central_directory_hash, + Some(vec![ + 103, 27, 141, 219, 82, 200, 254, 44, 155, 221, 183, 146, 193, 94, 154, 77, 133, 93, + 148, 88, 160, 123, 224, 170, 61, 140, 13, 2, 153, 86, 225, 231 + ]) + ); + assert_eq!( + collection.zip_central_directory_hash_range, + Some(HashRange::new(369, 727)) + ); + + assert_eq!( + collection.uris.get(Path::new("sample1/test1.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 39, 147, 91, 240, 68, 172, 194, 43, 70, 207, 141, 151, 141, 239, 180, 17, 170, + 106, 248, 168, 169, 245, 207, 172, 29, 204, 80, 155, 37, 30, 186, 60 + ]), + size: Some(47), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(44, 47)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test1.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 136, 103, 106, 251, 180, 19, 60, 244, 42, 171, 44, 215, 65, 252, 59, 127, 84, + 63, 175, 25, 6, 118, 200, 12, 188, 128, 67, 78, 249, 182, 242, 156 + ]), + size: Some(57), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(91, 57)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test2.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 164, 100, 0, 41, 229, 201, 3, 228, 30, 254, 72, 205, 60, 70, 104, 78, 121, 21, + 187, 230, 19, 242, 52, 212, 181, 104, 99, 179, 177, 81, 150, 33 + ]), + size: Some(53), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(148, 53)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test1/test3.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 129, 96, 58, 105, 119, 67, 2, 71, 77, 151, 99, 201, 192, 32, 213, 77, 19, 22, + 106, 204, 158, 142, 176, 247, 251, 174, 145, 243, 12, 22, 151, 116 + ]), + size: Some(68), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(201, 68)) + }) + ); + assert_eq!( + collection.uris.get(Path::new("sample1/test2.txt")), + Some(&UriHashedDataMap { + hash: Some(vec![ + 118, 254, 231, 173, 246, 184, 45, 104, 69, 72, 23, 21, 177, 202, 184, 241, 162, + 36, 28, 55, 23, 62, 109, 143, 182, 233, 99, 144, 23, 139, 9, 118 + ]), + size: Some(56), + dc_format: Some("txt".to_string()), + data_types: None, + zip_hash_range: Some(HashRange::new(313, 56)) + }) + ); + assert_eq!(collection.uris.len(), 5); + + Ok(()) + } +} diff --git a/sdk/src/assertions/labels.rs b/sdk/src/assertions/labels.rs index c4c5990cb..c11b8cee8 100644 --- a/sdk/src/assertions/labels.rs +++ b/sdk/src/assertions/labels.rs @@ -39,6 +39,11 @@ pub const DATA_HASH: &str = "c2pa.hash.data"; /// See . pub const BOX_HASH: &str = "c2pa.hash.boxes"; +/// Label prefix for a collection hash assertion. +/// +/// See . +pub const COLLECTION_HASH: &str = "c2pa.hash.collection.data"; + /// Label prefix for a BMFF-based hash assertion. /// /// See . diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs index 570e28ad9..6dac4efb7 100644 --- a/sdk/src/assertions/mod.rs +++ b/sdk/src/assertions/mod.rs @@ -25,6 +25,9 @@ pub use box_hash::{BoxHash, BoxMap, C2PA_BOXHASH}; mod data_hash; pub use data_hash::DataHash; +mod collection_hash; +pub use collection_hash::{CollectionHash, UriHashedDataMap}; + mod creative_work; pub use creative_work::CreativeWork; diff --git a/sdk/src/asset_handlers/mod.rs b/sdk/src/asset_handlers/mod.rs index 19dba1703..9323b17c1 100644 --- a/sdk/src/asset_handlers/mod.rs +++ b/sdk/src/asset_handlers/mod.rs @@ -20,6 +20,7 @@ pub mod png_io; pub mod riff_io; pub mod svg_io; pub mod tiff_io; +pub mod zip_io; #[cfg(feature = "pdf")] pub(crate) mod pdf; diff --git a/sdk/src/asset_handlers/zip_io.rs b/sdk/src/asset_handlers/zip_io.rs new file mode 100644 index 000000000..9e24b0ba3 --- /dev/null +++ b/sdk/src/asset_handlers/zip_io.rs @@ -0,0 +1,321 @@ +use std::{ + fs::{self, File}, + io::{self, Read}, + path::Path, +}; + +use tempfile::Builder; +use zip::{ + result::{ZipError, ZipResult}, + write::SimpleFileOptions, + CompressionMethod, ZipArchive, ZipWriter, +}; + +use crate::{ + asset_io::{ + self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter, + HashObjectPositions, + }, + error::Result, + CAIRead, CAIReadWrite, Error, +}; + +pub struct ZipIO {} + +impl CAIWriter for ZipIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + mut store_bytes: &[u8], + ) -> Result<()> { + let mut writer = self + .writer(input_stream, output_stream) + .map_err(|_| Error::EmbeddingError)?; + + match writer.add_directory("META-INF", SimpleFileOptions::default()) { + Err(ZipError::InvalidArchive("Duplicate filename")) => {} + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } + + match writer.start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default().compression_method(CompressionMethod::Stored), + ) { + Err(ZipError::InvalidArchive("Duplicate filename")) => { + writer.abort_file().map_err(|_| Error::EmbeddingError)?; + // TODO: remove code duplication + writer + .start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default().compression_method(CompressionMethod::Stored), + ) + .map_err(|_| Error::EmbeddingError)?; + } + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } + + io::copy(&mut store_bytes, &mut writer)?; + writer.finish().map_err(|_| Error::EmbeddingError)?; + + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + _input_stream: &mut dyn CAIRead, + ) -> Result> { + // TODO: error? + Ok(Vec::new()) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + let mut writer = self + .writer(input_stream, output_stream) + .map_err(|_| Error::EmbeddingError)?; + + match writer.start_file_from_path( + Path::new("META-INF/content_credential.c2pa"), + SimpleFileOptions::default(), + ) { + Err(ZipError::InvalidArchive("Duplicate filename")) => {} + Err(_) => return Err(Error::EmbeddingError), + _ => {} + } + writer.abort_file().map_err(|_| Error::EmbeddingError)?; + writer.finish().map_err(|_| Error::EmbeddingError)?; + + Ok(()) + } +} + +impl CAIReader for ZipIO { + fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result> { + let mut reader = self + .reader(asset_reader) + .map_err(|_| Error::JumbfNotFound)?; + + let index = reader + .index_for_path(Path::new("META-INF/content_credential.c2pa")) + .ok_or(Error::JumbfNotFound)?; + let mut file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?; + + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes)?; + + Ok(bytes) + } + + fn read_xmp(&self, _asset_reader: &mut dyn CAIRead) -> Option { + None + } +} + +impl AssetIO for ZipIO { + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + ZipIO {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(ZipIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(ZipIO::new(asset_type))) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> { + let mut stream = fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut stream, &mut temp_file, store_bytes)?; + + asset_io::rename_or_move(temp_file, asset_path) + } + + fn get_object_locations(&self, asset_path: &Path) -> Result> { + let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?; + self.get_object_locations_from_stream(&mut f) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + let mut stream = fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.remove_cai_store_from_stream(&mut stream, &mut temp_file)?; + + asset_io::rename_or_move(temp_file, asset_path) + } + + fn supported_types(&self) -> &[&str] { + &[ + // Zip + "zip", + "application/x-zip", + // EPUB + "epub", + "application/epub+zip", + // Office Open XML + "docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "docm", + "application/vnd.ms-word.document.macroenabled.12", + "xlsm", + "application/vnd.ms-excel.sheet.macroenabled.12", + "pptm", + "application/vnd.ms-powerpoint.presentation.macroenabled.12", + // Open Document + "odt", + "application/vnd.oasis.opendocument.text", + "ods", + "application/vnd.oasis.opendocument.spreadsheet", + "odp", + "application/vnd.oasis.opendocument.presentation", + "odg", + "application/vnd.oasis.opendocument.graphics", + "ott", + "application/vnd.oasis.opendocument.text-template", + "ots", + "application/vnd.oasis.opendocument.spreadsheet-template", + "otp", + "application/vnd.oasis.opendocument.presentation-template", + "otg", + "application/vnd.oasis.opendocument.graphics-template", + // OpenXPS + "oxps", + "application/oxps", + ] + } +} + +impl ZipIO { + fn reader<'a>( + &self, + input_stream: &'a mut dyn CAIRead, + ) -> ZipResult>> { + ZipArchive::new(CAIReadWrapper { + reader: input_stream, + }) + } + + fn writer<'a>( + &self, + input_stream: &'a mut dyn CAIRead, + output_stream: &'a mut dyn CAIReadWrite, + ) -> ZipResult>> { + input_stream.rewind()?; + io::copy(input_stream, output_stream)?; + + ZipWriter::new_append(CAIReadWriteWrapper { + reader_writer: output_stream, + }) + } +} + +#[cfg(test)] +mod tests { + use io::{Cursor, Seek}; + + use super::*; + + // TODO: add more sample file types + const SAMPLES: [&[u8]; 3] = [ + include_bytes!("../../tests/fixtures/sample1.zip"), + include_bytes!("../../tests/fixtures/sample1.docx"), + include_bytes!("../../tests/fixtures/sample1.odt"), + ]; + + #[test] + fn test_write_bytes() -> Result<()> { + for sample in SAMPLES { + let mut stream = Cursor::new(sample); + + let zip_io = ZipIO {}; + + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); + + let mut output_stream = Cursor::new(Vec::with_capacity(sample.len() + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream)?; + assert_eq!(data_written, random_bytes); + } + + Ok(()) + } + + #[test] + fn test_write_bytes_replace() -> Result<()> { + for sample in SAMPLES { + let mut stream = Cursor::new(sample); + + let zip_io = ZipIO {}; + + assert!(matches!( + zip_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); + + let mut output_stream1 = Cursor::new(Vec::with_capacity(sample.len() + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + zip_io.write_cai(&mut stream, &mut output_stream1, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream1)?; + assert_eq!(data_written, random_bytes); + + let mut output_stream2 = Cursor::new(Vec::with_capacity(sample.len() + 5)); + let random_bytes = [3, 2, 1, 2, 3]; + zip_io.write_cai(&mut output_stream1, &mut output_stream2, &random_bytes)?; + + let data_written = zip_io.read_cai(&mut output_stream2)?; + assert_eq!(data_written, random_bytes); + + let mut bytes = Vec::new(); + stream.rewind()?; + stream.read_to_end(&mut bytes)?; + assert_eq!(sample, bytes); + } + + Ok(()) + } +} diff --git a/sdk/src/builder.rs b/sdk/src/builder.rs index eacec53c0..f46f2e6f4 100644 --- a/sdk/src/builder.rs +++ b/sdk/src/builder.rs @@ -24,7 +24,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use uuid::Uuid; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; use crate::{ assertion::AssertionBase, @@ -416,12 +416,13 @@ impl Builder { { let mut zip = ZipWriter::new(stream); let options = - FileOptions::default().compression_method(zip::CompressionMethod::Stored); + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); // write a version file zip.start_file("version.txt", options) .map_err(|e| Error::OtherError(Box::new(e)))?; zip.write_all(ARCHIVE_VERSION.as_bytes())?; // write the manifest.json file + zip.start_file("manifest.json", options) .map_err(|e| Error::OtherError(Box::new(e)))?; zip.write_all(&serde_json::to_vec(self)?)?; diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs index 677ec9be7..752ca35be 100644 --- a/sdk/src/jumbf_io.rs +++ b/sdk/src/jumbf_io.rs @@ -28,7 +28,7 @@ use crate::asset_handlers::pdf_io::PdfIO; use crate::{ asset_handlers::{ bmff_io::BmffIO, c2pa_io::C2paIO, gif_io::GifIO, jpeg_io::JpegIO, mp3_io::Mp3IO, - png_io::PngIO, riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO, + png_io::PngIO, riff_io::RiffIO, svg_io::SvgIO, tiff_io::TiffIO, zip_io::ZipIO, }, asset_io::{AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashObjectPositions}, error::{Error, Result}, @@ -48,6 +48,7 @@ lazy_static! { Box::new(SvgIO::new("")), Box::new(TiffIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), Box::new(GifIO::new("")), ]; @@ -77,6 +78,7 @@ lazy_static! { Box::new(SvgIO::new("")), Box::new(TiffIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), Box::new(GifIO::new("")), ]; let mut handler_map = HashMap::new(); @@ -366,6 +368,7 @@ pub mod tests { Box::new(TiffIO::new("")), Box::new(SvgIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; // build handler map @@ -390,6 +393,7 @@ pub mod tests { Box::new(TiffIO::new("")), Box::new(SvgIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), ]; // build handler map @@ -407,6 +411,7 @@ pub mod tests { Box::new(JpegIO::new("")), Box::new(PngIO::new("")), Box::new(Mp3IO::new("")), + Box::new(ZipIO::new("")), Box::new(SvgIO::new("")), Box::new(RiffIO::new("")), Box::new(GifIO::new("")), diff --git a/sdk/src/utils/hash_utils.rs b/sdk/src/utils/hash_utils.rs index 221e8d0ca..a7450cfac 100644 --- a/sdk/src/utils/hash_utils.rs +++ b/sdk/src/utils/hash_utils.rs @@ -206,6 +206,8 @@ pub fn hash_asset_by_alg_with_inclusions( to_be_hashed: [IIIIIXXXXXMIIIIIMXXXXXMXXXXIII...III] The data is again split into range sets breaking at the exclusion points and now also the markers. + + // TODO: describe collection hash */ pub fn hash_stream_by_alg( alg: &str, diff --git a/sdk/tests/fixtures/sample1.docx b/sdk/tests/fixtures/sample1.docx new file mode 100644 index 000000000..919cb20c0 Binary files /dev/null and b/sdk/tests/fixtures/sample1.docx differ diff --git a/sdk/tests/fixtures/sample1.odt b/sdk/tests/fixtures/sample1.odt new file mode 100644 index 000000000..a850fca6b Binary files /dev/null and b/sdk/tests/fixtures/sample1.odt differ diff --git a/sdk/tests/fixtures/sample1.zip b/sdk/tests/fixtures/sample1.zip new file mode 100644 index 000000000..18c5dd36e Binary files /dev/null and b/sdk/tests/fixtures/sample1.zip differ