Skip to content

feat: Add ZIP support (+ EPUB, Office Open XML, Open Document, and OpenXPS) and Collection Data Hash assertion #499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2c22931
Foundation for ZIP support
ok-nick Jul 8, 2024
a84411a
Foundation for collection data hash assertion
ok-nick Jul 8, 2024
c03d653
Collection assertion hash resolver
ok-nick Jul 9, 2024
9e4af21
Rework collection hash assertion
ok-nick Jul 9, 2024
6559e6f
More collection assertion validation
ok-nick Jul 9, 2024
ac1a12d
Fix ZIP unit tests
ok-nick Jul 9, 2024
d1f3eab
Update memchr dep
ok-nick Jul 9, 2024
9b5be12
Update memchr dep in make_test_images
ok-nick Jul 9, 2024
f57b832
Add ZIP unit tests
ok-nick Jul 9, 2024
5cf73e0
Merge branch 'main' into ok-nick/zip
ok-nick Jul 9, 2024
7700453
Collection Hash Assertion relative path validation
ok-nick Jul 10, 2024
7994817
Add collection hash unit tests
ok-nick Jul 10, 2024
484cca8
Pass CI for collection hash
ok-nick Jul 10, 2024
32fc674
Fix ZIP offsets/lens
ok-nick Jul 10, 2024
d668888
Collection assertion docs, optimizations, and cleanup
ok-nick Jul 11, 2024
11bef80
Cleanup collection hash errors
ok-nick Jul 12, 2024
d383ce8
Rework collection hash and add better validation
ok-nick Jul 12, 2024
744045d
More file types for ZIP unit tests
ok-nick Jul 12, 2024
d0704e9
Merge remote-tracking branch 'origin' into ok-nick/zip
ok-nick Aug 8, 2024
db83807
Hash central directory and add unit tests
ok-nick Aug 8, 2024
c2feb82
Fix thiserror dependency conflict
ok-nick Aug 8, 2024
97ebd56
Use latest zip crate (with fix)
ok-nick Aug 29, 2024
e29fe8b
Merge remote-tracking branch 'origin' into ok-nick/zip
ok-nick Aug 29, 2024
a5d0533
Update log crate to fix dependency conflict
ok-nick Aug 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ sha2 = "0.10.2"
tempfile = "3.10.1"
thiserror = "1.0.61"
treeline = "0.1.0"
url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed.
url = "2.2.2, <2.5.1" # Can't use 2.5.1 or newer until new license is reviewed.
uuid = { version = "1.3.1", features = ["serde", "v4", "wasm-bindgen"] }
x509-parser = "0.15.1"
x509-certificate = "0.19.0"
zip = { version = "0.6.6", default-features = false }
zip = { version = "2.1.3", default-features = false }

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
ureq = "2.4.0"
Expand Down
79 changes: 79 additions & 0 deletions sdk/src/assertions/collection_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
use std::io::{Read, Seek};

use serde::{Deserialize, Serialize};

use crate::{assertions::AssetType, asset_handlers::zip_io, hash_stream_by_alg, Error, Result};

#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
pub struct CollectionHash {
pub uri_maps: Vec<UriHashedDataMap>,

#[serde(skip_serializing_if = "Option::is_none")]
pub alg: Option<String>,

#[serde(skip_serializing_if = "Option::is_none", with = "serde_bytes")]
pub zip_central_directory_hash: Option<Vec<u8>>,
}

#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
pub struct UriHashedDataMap {
pub uri: String,

#[serde(with = "serde_bytes")]
pub hash: Vec<u8>,

#[serde(skip_serializing_if = "Option::is_none")]
pub size: Option<u64>,

#[serde(rename = "dc:format", skip_serializing_if = "Option::is_none")]
pub dc_format: Option<String>,

#[serde(skip_serializing_if = "Option::is_none")]
pub data_types: Option<Vec<AssetType>>,
}

impl CollectionHash {
pub fn new(alg: String) -> Self {
CollectionHash {
uri_maps: Vec::new(),
alg: Some(alg),
zip_central_directory_hash: None,
}
}

fn add_uri_map(&mut self, uri_map: UriHashedDataMap) {
self.uri_maps.push(uri_map);
}

// TODO: support custom collection hashes
pub fn gen_hash_from_stream<R>(&mut self, stream: &mut R) -> Result<()>
where
R: Read + Seek + ?Sized,
{
let alg = match self.alg {
Some(ref a) => a.clone(),
None => "sha256".to_string(),
};

let zip_central_directory_inclusions = zip_io::central_directory_inclusions(stream)?;
let zip_central_directory_hash =
hash_stream_by_alg(&alg, stream, Some(zip_central_directory_inclusions), false)?;
if zip_central_directory_hash.is_empty() {
return Err(Error::BadParam("could not generate data hash".to_string()));
}
self.zip_central_directory_hash = Some(zip_central_directory_hash);

let uri_inclusions = zip_io::uri_inclusions(stream, &self.uri_maps)?;
for (i, uri_map) in self.uri_maps.iter_mut().enumerate() {
let hash =
hash_stream_by_alg(&alg, stream, Some(vec![uri_inclusions[i].clone()]), false)?;
if hash.is_empty() {
return Err(Error::BadParam("could not generate data hash".to_string()));
}

uri_map.hash = hash;
}

Ok(())
}
}
3 changes: 3 additions & 0 deletions sdk/src/assertions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ pub use box_hash::{BoxHash, BoxMap, C2PA_BOXHASH};
mod data_hash;
pub use data_hash::DataHash;

mod collection_hash;
pub use collection_hash::{CollectionHash, UriHashedDataMap};

mod creative_work;
pub use creative_work::CreativeWork;

Expand Down
1 change: 1 addition & 0 deletions sdk/src/asset_handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub mod png_io;
pub mod riff_io;
pub mod svg_io;
pub mod tiff_io;
pub mod zip_io;

#[cfg(feature = "pdf")]
pub(crate) mod pdf;
Expand Down
270 changes: 270 additions & 0 deletions sdk/src/asset_handlers/zip_io.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
use std::{
fs::{self, File},
io::{self, Read, Seek},
path::Path,
};

use tempfile::Builder;
use zip::{result::ZipResult, write::SimpleFileOptions, CompressionMethod, ZipArchive, ZipWriter};

use crate::{
assertions::UriHashedDataMap,
asset_io::{
self, AssetIO, CAIReadWrapper, CAIReadWriteWrapper, CAIReader, CAIWriter,
HashObjectPositions,
},
error::Result,
CAIRead, CAIReadWrite, Error, HashRange,
};

pub struct ZipIO {}

impl CAIWriter for ZipIO {
fn write_cai(
&self,
input_stream: &mut dyn CAIRead,
output_stream: &mut dyn CAIReadWrite,
mut store_bytes: &[u8],
) -> Result<()> {
let mut writer = self
.writer(input_stream, output_stream)
.map_err(|_| Error::EmbeddingError)?;

// TODO: what happens if the dir exists?
writer
.add_directory("META-INF", SimpleFileOptions::default())
.map_err(|_| Error::EmbeddingError)?;

writer
.start_file_from_path(
Path::new("META-INF/content_credential.c2pa"),
SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
)
.map_err(|_| Error::EmbeddingError)?;
io::copy(&mut store_bytes, &mut writer)?;
writer.finish().map_err(|_| Error::EmbeddingError)?;

Ok(())
}

fn get_object_locations_from_stream(
&self,
_input_stream: &mut dyn CAIRead,
) -> Result<Vec<HashObjectPositions>> {
// TODO: error?
Ok(Vec::new())
}

fn remove_cai_store_from_stream(
&self,
input_stream: &mut dyn CAIRead,
output_stream: &mut dyn CAIReadWrite,
) -> Result<()> {
let mut writer = self
.writer(input_stream, output_stream)
.map_err(|_| Error::EmbeddingError)?;

writer
.start_file_from_path(
Path::new("META-INF/content_credential.c2pa"),
SimpleFileOptions::default(),
)
.map_err(|_| Error::EmbeddingError)?;
writer.abort_file().map_err(|_| Error::EmbeddingError)?;
writer.finish().map_err(|_| Error::EmbeddingError)?;

Ok(())
}
}

impl CAIReader for ZipIO {
fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result<Vec<u8>> {
let mut reader = self
.reader(asset_reader)
.map_err(|_| Error::JumbfNotFound)?;

let index = reader
.index_for_path(Path::new("META-INF/content_credential.c2pa"))
.ok_or(Error::JumbfNotFound)?;
let mut file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;

let mut bytes = Vec::new();
file.read_to_end(&mut bytes)?;

Ok(bytes)
}

fn read_xmp(&self, _asset_reader: &mut dyn CAIRead) -> Option<String> {
None
}
}

impl AssetIO for ZipIO {
fn new(_asset_type: &str) -> Self
where
Self: Sized,
{
ZipIO {}
}

fn get_handler(&self, asset_type: &str) -> Box<dyn AssetIO> {
Box::new(ZipIO::new(asset_type))
}

fn get_reader(&self) -> &dyn CAIReader {
self
}

fn get_writer(&self, asset_type: &str) -> Option<Box<dyn CAIWriter>> {
Some(Box::new(ZipIO::new(asset_type)))
}

fn read_cai_store(&self, asset_path: &Path) -> Result<Vec<u8>> {
let mut f = File::open(asset_path)?;
self.read_cai(&mut f)
}

fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> {
let mut stream = fs::OpenOptions::new()
.read(true)
.open(asset_path)
.map_err(Error::IoError)?;

let mut temp_file = Builder::new()
.prefix("c2pa_temp")
.rand_bytes(5)
.tempfile()?;

self.write_cai(&mut stream, &mut temp_file, store_bytes)?;

asset_io::rename_or_move(temp_file, asset_path)
}

fn get_object_locations(&self, asset_path: &Path) -> Result<Vec<HashObjectPositions>> {
let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?;
self.get_object_locations_from_stream(&mut f)
}

fn remove_cai_store(&self, asset_path: &Path) -> Result<()> {
let mut stream = fs::OpenOptions::new()
.read(true)
.open(asset_path)
.map_err(Error::IoError)?;

let mut temp_file = Builder::new()
.prefix("c2pa_temp")
.rand_bytes(5)
.tempfile()?;

self.remove_cai_store_from_stream(&mut stream, &mut temp_file)?;

asset_io::rename_or_move(temp_file, asset_path)
}

fn supported_types(&self) -> &[&str] {
&[
// Zip
"zip",
"application/x-zip",
// EPUB
"epub",
"application/epub+zip",
// Office Open XML
"docx",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"docm",
"application/vnd.ms-word.document.macroEnabled.12",
"xlsm",
"application/vnd.ms-excel.sheet.macroEnabled.12",
"pptm",
"application/vnd.ms-powerpoint.presentation.macroEnabled.12",
// Open Document
"odt",
"application/vnd.oasis.opendocument.text",
"ods",
"application/vnd.oasis.opendocument.spreadsheet",
"odp",
"application/vnd.oasis.opendocument.presentation",
"odg",
"application/vnd.oasis.opendocument.graphics",
"ott",
"application/vnd.oasis.opendocument.text-template",
"ots",
"application/vnd.oasis.opendocument.spreadsheet-template",
"otp",
"application/vnd.oasis.opendocument.presentation-template",
"otg",
"application/vnd.oasis.opendocument.graphics-template",
// OpenXPS
"oxps",
"application/oxps",
]
}
}

impl ZipIO {
fn writer<'a>(
&self,
input_stream: &'a mut dyn CAIRead,
output_stream: &'a mut dyn CAIReadWrite,
) -> ZipResult<ZipWriter<CAIReadWriteWrapper<'a>>> {
let mut writer = ZipWriter::new_append(CAIReadWriteWrapper {
reader_writer: output_stream,
})?;

writer.merge_archive(ZipArchive::new(CAIReadWrapper {
reader: input_stream,
})?)?;

Ok(writer)
}

fn reader<'a>(
&self,
input_stream: &'a mut dyn CAIRead,
) -> ZipResult<ZipArchive<CAIReadWrapper<'a>>> {
ZipArchive::new(CAIReadWrapper {
reader: input_stream,
})
}
}

pub fn central_directory_inclusions<R>(reader: &mut R) -> Result<Vec<HashRange>>
where
R: Read + Seek + ?Sized,
{
let _reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;

// TODO: https://github.com/zip-rs/zip2/pull/71
// or
// https://gitlab.com/xMAC94x/zip-core (https://github.com/zip-rs/zip2/issues/204)

todo!()
}

pub fn uri_inclusions<R>(reader: &mut R, uri_maps: &[UriHashedDataMap]) -> Result<Vec<HashRange>>
where
R: Read + Seek + ?Sized,
{
let mut reader = ZipArchive::new(reader).map_err(|_| Error::JumbfNotFound)?;

let mut ranges = Vec::new();
for uri_map in uri_maps {
let index = reader
.index_for_path(Path::new(&uri_map.uri))
.ok_or(Error::JumbfNotFound)?;
let file = reader.by_index(index).map_err(|_| Error::JumbfNotFound)?;
// TODO: hash from header or data? does compressed_size include header?
// and fix error type
ranges.push(HashRange::new(
usize::try_from(file.header_start()).map_err(|_| Error::JumbfNotFound)?,
usize::try_from(file.compressed_size()).map_err(|_| Error::JumbfNotFound)?,
));
}

Ok(ranges)
}
Loading
Loading