diff --git a/Cargo.toml b/Cargo.toml index 1d2a07a07..a859023fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,10 @@ [workspace] resolver = "2" -members = ["sdk", "export_schema", "make_test_images"] +members = [ + "sdk", + "export_schema", + "make_test_images", + "sdk/crates/*", + # TODO: add wildcard + "sdk/crates/c2pa-codecs/fuzz", +] diff --git a/sdk/crates/c2pa-codecs/Cargo.toml b/sdk/crates/c2pa-codecs/Cargo.toml new file mode 100644 index 000000000..b2463ad8c --- /dev/null +++ b/sdk/crates/c2pa-codecs/Cargo.toml @@ -0,0 +1,59 @@ +[package] +name = "c2pa-codecs" +version = "0.1.0" +edition = "2021" + +[dependencies] +thiserror = "1.0.61" +# TODO: need? +memchr = "2.7.1" +# TODO: unneeded and super unmaintained +conv = "0.3.3" +# TODO: remove this +tempfile = "3.10.1" +byteorder = { version = "1.4.3", default-features = false } +# TODO: do we need +serde_bytes = { version = "0.11.5", optional = true } +# TODO: temp +serde = { version = "1.0.197", features = ["derive"] } +# TODO: unmaintained +atree = "0.5.2" +# TODO: this crate is deprecated, use quick-xml +fast-xml = { version = "0.23.1", optional = true } +quick-xml = { version = "0.36.1", optional = true } +lopdf = { version = "0.31.0", optional = true } +# Version 1.13.0 doesn't compile under Rust < 1.75, pinning to 1.12.0 +id3 = { version = "=1.12.0", optional = true } +png_pong = { version = "0.9.1", optional = true } +# TODO: sort of unmaintained +img-parts = { version = "0.3.0", optional = true } +# TODO: sort of unmaintained +riff = { version = "1.0.1", optional = true } +# TODO: needed? +base64 = { version = "0.21.2", optional = true } +# TODO: look into +jfifdump = { version = "0.5.1", optional = true } + +[features] +default = [ + "bmff", + "gif", + "jpeg", + "mp3", + "pdf", + "png", + "riff", + "svg", + "tiff", + "xmp", +] +xmp = ["fast-xml"] +bmff = [] +gif = [] +jpeg = ["img-parts", "jfifdump"] +mp3 = ["id3"] +pdf = ["lopdf"] +png = ["png_pong", "img-parts"] # TODO: remove img-parts feature here +riff = ["dep:riff"] +svg = ["fast-xml", "base64"] +tiff = [] diff --git a/sdk/crates/c2pa-codecs/fuzz/Cargo.toml b/sdk/crates/c2pa-codecs/fuzz/Cargo.toml new file mode 100644 index 000000000..9f0845752 --- /dev/null +++ b/sdk/crates/c2pa-codecs/fuzz/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "c2pa-codecs-fuzz" +version = "0.1.0" +edition = "2021" + +[dependencies] +afl = "0.15.10" +c2pa-codecs = { path = ".." } diff --git a/sdk/crates/c2pa-codecs/fuzz/src/main.rs b/sdk/crates/c2pa-codecs/fuzz/src/main.rs new file mode 100644 index 000000000..577d42fa2 --- /dev/null +++ b/sdk/crates/c2pa-codecs/fuzz/src/main.rs @@ -0,0 +1,25 @@ +use std::io::Cursor; + +use c2pa_codecs::{ + codecs::{gif::GifCodec, svg::SvgCodec}, + Decode, +}; + +// TODO: add all codecs and add way to choose what to fuzz, reading/writing/c2pa/xmp/etc. +fn main() { + afl::fuzz!(|data: &[u8]| { + let src = Cursor::new(data); + + // let mut c = GifCodec::new(src); + // let _ = c.read_c2pa(); + // let _ = c.read_xmp(); + + let mut c = SvgCodec::new(src); + let _ = c.read_c2pa(); + let _ = c.read_xmp(); + + // let mut c = C2paCodec::new(src); + // let _ = c.read_c2pa(); + // let _ = c.read_xmp(); + }); +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/bmff_io.rs b/sdk/crates/c2pa-codecs/src/codecs/bmff_io.rs new file mode 100644 index 000000000..d18001f70 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/bmff_io.rs @@ -0,0 +1,2003 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + cmp::min, + collections::HashMap, + fs::{File, OpenOptions}, + io::{Cursor, Read, Seek, SeekFrom, Write}, + path::Path, +}; + +use atree::{Arena, Token}; +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use conv::ValueFrom; +use tempfile::Builder; + +use crate::{ + assertions::{BmffMerkleMap, ExclusionsMap}, + asset_io::{ + rename_or_move, AssetIO, AssetPatch, CAIRead, CAIReadWrite, CAIReader, CAIWriter, + HashObjectPositions, RemoteRefEmbed, RemoteRefEmbedType, + }, + error::{Error, Result}, + utils::{ + hash_utils::{vec_compare, HashRange}, + xmp_inmemory_utils::{add_provenance, MIN_XMP}, + }, +}; + +pub struct BmffIO { + #[allow(dead_code)] + bmff_format: String, // can be used for specialized BMFF cases +} + +const HEADER_SIZE: u64 = 8; // 4 byte type + 4 byte size +const HEADER_SIZE_LARGE: u64 = 16; // 4 byte type + 4 byte size + 8 byte large size + +const C2PA_UUID: [u8; 16] = [ + 0xd8, 0xfe, 0xc3, 0xd6, 0x1b, 0x0e, 0x48, 0x3c, 0x92, 0x97, 0x58, 0x28, 0x87, 0x7e, 0xc4, 0x81, +]; +const XMP_UUID: [u8; 16] = [ + 0xbe, 0x7a, 0xcf, 0xcb, 0x97, 0xa9, 0x42, 0xe8, 0x9c, 0x71, 0x99, 0x94, 0x91, 0xe3, 0xaf, 0xac, +]; +const MANIFEST: &str = "manifest"; +const MERKLE: &str = "merkle"; + +// ISO IEC 14496-12_2022 FullBoxes +const FULL_BOX_TYPES: &[&str; 80] = &[ + "pdin", "mvhd", "tkhd", "mdhd", "hdlr", "nmhd", "elng", "stsd", "stdp", "stts", "ctts", "cslg", + "stss", "stsh", "stdp", "elst", "dref", "stsz", "stz2", "stsc", "stco", "co64", "padb", "subs", + "saiz", "saio", "mehd", "trex", "mfhd", "tfhd", "trun", "tfra", "mfro", "tfdt", "leva", "trep", + "assp", "sbgp", "sgpd", "csgp", "cprt", "tsel", "kind", "meta", "xml ", "bxml", "iloc", "pitm", + "ipro", "infe", "iinf", "iref", "ipma", "schm", "fiin", "fpar", "fecr", "gitn", "fire", "stri", + "stsg", "stvi", "csch", "sidx", "ssix", "prft", "srpp", "vmhd", "smhd", "srat", "chnl", "dmix", + "txtC", "mime", "uri ", "uriI", "hmhd", "sthd", "vvhd", "medc", +]; + +static SUPPORTED_TYPES: [&str; 13] = [ + "avif", + "heif", + "heic", + "mp4", + "m4a", + "mov", + "application/mp4", + "audio/mp4", + "image/avif", + "image/heic", + "image/heif", + "video/mp4", + "video/quicktime", +]; + +macro_rules! boxtype { + ($( $name:ident => $value:expr ),*) => { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum BoxType { + $( $name, )* + UnknownBox(u32), + } + + impl From for BoxType { + fn from(t: u32) -> BoxType { + match t { + $( $value => BoxType::$name, )* + _ => BoxType::UnknownBox(t), + } + } + } + + impl From for u32 { + fn from(t: BoxType) -> u32 { + match t { + $( BoxType::$name => $value, )* + BoxType::UnknownBox(t) => t, + } + } + } + } +} + +boxtype! { + Empty => 0x0000_0000, + UuidBox => 0x75756964, + FtypBox => 0x66747970, + MvhdBox => 0x6d766864, + MfhdBox => 0x6d666864, + FreeBox => 0x66726565, + MdatBox => 0x6d646174, + MoovBox => 0x6d6f6f76, + MvexBox => 0x6d766578, + MehdBox => 0x6d656864, + TrexBox => 0x74726578, + EmsgBox => 0x656d7367, + MoofBox => 0x6d6f6f66, + TkhdBox => 0x746b6864, + TfhdBox => 0x74666864, + EdtsBox => 0x65647473, + MdiaBox => 0x6d646961, + ElstBox => 0x656c7374, + MfraBox => 0x6d667261, + MdhdBox => 0x6d646864, + HdlrBox => 0x68646c72, + MinfBox => 0x6d696e66, + VmhdBox => 0x766d6864, + StblBox => 0x7374626c, + StsdBox => 0x73747364, + SttsBox => 0x73747473, + CttsBox => 0x63747473, + StssBox => 0x73747373, + StscBox => 0x73747363, + StszBox => 0x7374737A, + StcoBox => 0x7374636F, + Co64Box => 0x636F3634, + TrakBox => 0x7472616b, + TrafBox => 0x74726166, + TrefBox => 0x74726566, + TregBox => 0x74726567, + TrunBox => 0x7472756E, + UdtaBox => 0x75647461, + DinfBox => 0x64696e66, + DrefBox => 0x64726566, + UrlBox => 0x75726C20, + SmhdBox => 0x736d6864, + Avc1Box => 0x61766331, + AvcCBox => 0x61766343, + Hev1Box => 0x68657631, + HvcCBox => 0x68766343, + Mp4aBox => 0x6d703461, + EsdsBox => 0x65736473, + Tx3gBox => 0x74783367, + VpccBox => 0x76706343, + Vp09Box => 0x76703039, + MetaBox => 0x6D657461, + SchiBox => 0x73636869, + IlocBox => 0x696C6F63 +} + +struct BoxHeaderLite { + pub name: BoxType, + pub size: u64, + pub fourcc: String, + pub large_size: bool, +} + +impl BoxHeaderLite { + pub fn new(name: BoxType, size: u64, fourcc: &str) -> Self { + Self { + name, + size, + fourcc: fourcc.to_string(), + large_size: false, + } + } + + pub fn read(reader: &mut R) -> Result { + // Create and read to buf. + let mut buf = [0u8; 8]; // 8 bytes for box header. + reader.read_exact(&mut buf)?; + + // Get size. + let mut s = [0u8; 4]; + s.clone_from_slice(&buf[0..4]); + let size = u32::from_be_bytes(s); + + // Get box type string. + let mut t = [0u8; 4]; + t.clone_from_slice(&buf[4..8]); + let fourcc = String::from_utf8_lossy(&buf[4..8]).to_string(); + let typ = u32::from_be_bytes(t); + + // Get largesize if size is 1 + if size == 1 { + reader.read_exact(&mut buf)?; + let largesize = u64::from_be_bytes(buf); + + Ok(BoxHeaderLite { + name: BoxType::from(typ), + size: largesize, + fourcc, + large_size: true, + }) + } else { + Ok(BoxHeaderLite { + name: BoxType::from(typ), + size: size as u64, + fourcc, + large_size: false, + }) + } + } + + pub fn write(&self, writer: &mut W) -> Result { + if self.size > u32::MAX as u64 { + writer.write_u32::(1)?; + writer.write_u32::(self.name.into())?; + writer.write_u64::(self.size)?; + Ok(16) + } else { + writer.write_u32::(self.size as u32)?; + writer.write_u32::(self.name.into())?; + Ok(8) + } + } +} + +fn write_box_uuid_extension(w: &mut W, uuid: &[u8; 16]) -> Result { + w.write_all(uuid)?; + Ok(16) +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct BoxInfo { + path: String, + parent: Option, + pub offset: u64, + pub size: u64, + box_type: BoxType, + user_type: Option>, + version: Option, + flags: Option, +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct BoxInfoLite { + pub path: String, + pub offset: u64, + pub size: u64, +} + +fn read_box_header_ext(reader: &mut R) -> Result<(u8, u32)> { + let version = reader.read_u8()?; + let flags = reader.read_u24::()?; + Ok((version, flags)) +} +fn write_box_header_ext(w: &mut W, v: u8, f: u32) -> Result { + w.write_u8(v)?; + w.write_u24::(f)?; + Ok(4) +} + +fn box_start(reader: &mut R, is_large: bool) -> Result { + if is_large { + Ok(reader.stream_position()? - HEADER_SIZE_LARGE) + } else { + Ok(reader.stream_position()? - HEADER_SIZE) + } +} + +fn _skip_bytes(reader: &mut R, size: u64) -> Result<()> { + reader.seek(SeekFrom::Current(size as i64))?; + Ok(()) +} + +fn skip_bytes_to(reader: &mut R, pos: u64) -> Result { + let pos = reader.seek(SeekFrom::Start(pos))?; + Ok(pos) +} + +fn write_c2pa_box( + w: &mut W, + data: &[u8], + is_manifest: bool, + merkle_data: &[u8], +) -> Result<()> { + let purpose_size = if is_manifest { + MANIFEST.len() + 1 + } else { + MERKLE.len() + 1 + }; + let merkle_size = if is_manifest { 8 } else { merkle_data.len() }; + let size = 8 + 16 + 4 + purpose_size + merkle_size + data.len(); // header + UUID + version/flags + data + zero terminated purpose + merkle data + let bh = BoxHeaderLite::new(BoxType::UuidBox, size as u64, "uuid"); + + // write out header + bh.write(w)?; + + // write out c2pa extension UUID + write_box_uuid_extension(w, &C2PA_UUID)?; + + // write out version and flags + let version: u8 = 0; + let flags: u32 = 0; + write_box_header_ext(w, version, flags)?; + + // write purpose + if is_manifest { + w.write_all(MANIFEST.as_bytes())?; + w.write_u8(0)?; + + // write no merkle flag + w.write_u64::(0)?; + } else { + w.write_all(MERKLE.as_bytes())?; + w.write_u8(0)?; + + // write merkle cbor + w.write_all(merkle_data)?; + } + + // write out data + w.write_all(data)?; + + Ok(()) +} + +fn write_xmp_box(w: &mut W, data: &[u8]) -> Result<()> { + let size = 8 + 16 + 4 + data.len(); // header + UUID + data + let bh = BoxHeaderLite::new(BoxType::UuidBox, size as u64, "uuid"); + + // write out header + bh.write(w)?; + + // write out XMP extension UUID + write_box_uuid_extension(w, &XMP_UUID)?; + + // write out data + w.write_all(data)?; + + Ok(()) +} + +fn _write_free_box(w: &mut W, size: usize) -> Result<()> { + if size < 8 { + return Err(Error::BadParam("cannot adjust free space".to_string())); + } + + let zeros = vec![0u8; size - 8]; + let bh = BoxHeaderLite::new(BoxType::FreeBox, size as u64, "free"); + + // write out header + bh.write(w)?; + + // write out header + w.write_all(&zeros)?; + + Ok(()) +} + +fn add_token_to_cache(bmff_path_map: &mut HashMap>, path: String, token: Token) { + if let Some(token_list) = bmff_path_map.get_mut(&path) { + token_list.push(token); + } else { + let token_list = vec![token]; + bmff_path_map.insert(path, token_list); + } +} + +fn path_from_token(bmff_tree: &Arena, current_node_token: &Token) -> Result { + let ancestors = current_node_token.ancestors(bmff_tree); + let mut path = bmff_tree[*current_node_token].data.path.clone(); + + for parent in ancestors { + path = format!("{}/{}", parent.data.path, path); + } + + if path.is_empty() { + path = "/".to_string(); + } + + Ok(path) +} + +fn get_top_level_box_offsets( + bmff_tree: &Arena, + bmff_path_map: &HashMap>, +) -> Vec { + let mut tl_offsets = Vec::new(); + + for (p, t) in bmff_path_map { + // look for top level offsets + if p.matches('/').count() == 1 { + for token in t { + if let Some(box_info) = bmff_tree.get(*token) { + tl_offsets.push(box_info.data.offset); + } + } + } + } + + tl_offsets +} + +fn get_top_level_boxes( + bmff_tree: &Arena, + bmff_path_map: &HashMap>, +) -> Vec { + let mut tl_boxes = Vec::new(); + + for (p, t) in bmff_path_map { + // look for top level offsets + if p.matches('/').count() == 1 { + for token in t { + if let Some(box_info) = bmff_tree.get(*token) { + tl_boxes.push(BoxInfoLite { + path: box_info.data.path.clone(), + offset: box_info.data.offset, + size: box_info.data.size, + }); + } + } + } + } + + tl_boxes +} + +pub fn bmff_to_jumbf_exclusions( + reader: &mut R, + bmff_exclusions: &[ExclusionsMap], + bmff_v2: bool, +) -> Result> +where + R: Read + Seek + ?Sized, +{ + let size = reader.seek(SeekFrom::End(0))?; + reader.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree(reader, size, &mut bmff_tree, &root_token, &mut bmff_map)?; + + // get top level box offsets + let mut tl_offsets = get_top_level_box_offsets(&bmff_tree, &bmff_map); + tl_offsets.sort(); + + let mut exclusions = Vec::new(); + + for bmff_exclusion in bmff_exclusions { + if let Some(box_token_list) = bmff_map.get(&bmff_exclusion.xpath) { + for box_token in box_token_list { + let box_info = &bmff_tree[*box_token].data; + + let box_start = box_info.offset; + let box_length = box_info.size; + + let exclusion_start = box_start; + let exclusion_length = box_length; + + // adjust exclusion bounds as needed + + // check the length + if let Some(desired_length) = bmff_exclusion.length { + if desired_length as u64 != box_length { + continue; + } + } + + // check the version + if let Some(desired_version) = bmff_exclusion.version { + if let Some(box_version) = box_info.version { + if desired_version != box_version { + continue; + } + } + } + + // check the flags + if let Some(desired_flag_bytes) = &bmff_exclusion.flags { + let mut temp_bytes = [0u8; 4]; + if desired_flag_bytes.len() >= 3 { + temp_bytes[0] = desired_flag_bytes[0]; + temp_bytes[1] = desired_flag_bytes[1]; + temp_bytes[2] = desired_flag_bytes[2]; + } + let desired_flags = u32::from_be_bytes(temp_bytes); + + if let Some(box_flags) = box_info.flags { + let exact = bmff_exclusion.exact.unwrap_or(true); + + if exact { + if desired_flags != box_flags { + continue; + } + } else { + // bitwise match + if (desired_flags | box_flags) != desired_flags { + continue; + } + } + } + } + + // check data match + if let Some(data_map_vec) = &bmff_exclusion.data { + let mut should_add = true; + + for data_map in data_map_vec { + // move to the start of exclusion + skip_bytes_to(reader, box_start + data_map.offset as u64)?; + + // match the data + let mut buf = vec![0u8; data_map.value.len()]; + reader.read_exact(&mut buf)?; + + // does not match so skip + if !vec_compare(&data_map.value, &buf) { + should_add = false; + break; + } + } + if !should_add { + continue; + } + } + + // reduce range if desired + if let Some(subset_vec) = &bmff_exclusion.subset { + for subset in subset_vec { + let exclusion = HashRange::new( + (exclusion_start + subset.offset as u64) as usize, + (if subset.length == 0 { + exclusion_length - subset.offset as u64 + } else { + min(subset.length as u64, exclusion_length) + }) as usize, + ); + + exclusions.push(exclusion); + } + } else { + // exclude box in its entirty + let exclusion = + HashRange::new(exclusion_start as usize, exclusion_length as usize); + + exclusions.push(exclusion); + + // for BMFF V2 hashes we do not add hash offsets for top level boxes + // that are completely excluded, so remove from BMFF V2 hash offset calc + if let Some(pos) = tl_offsets.iter().position(|x| *x == exclusion_start) { + tl_offsets.remove(pos); + } + } + } + } + } + + // add remaining top level offsets to be included when generating BMFF V2 hashes + // note: this is technically not an exclusion but a replacement with a new range of bytes to be hashed + if bmff_v2 { + for tl_start in tl_offsets { + let mut exclusion = HashRange::new(tl_start as usize, 1); + exclusion.set_bmff_offset(tl_start); + + exclusions.push(exclusion); + } + } + + Ok(exclusions) +} + +// `iloc`, `stco` and `co64` elements contain absolute file offsets so they need to be adjusted based on whether content was added or removed. +// todo: when fragment support is added adjust these (/moof/iloc, /moof/mfro, /moof/traf/saio, /sidx) +fn adjust_known_offsets( + output: &mut W, + bmff_tree: &Arena, + bmff_path_map: &HashMap>, + adjust: i32, +) -> Result<()> { + let start_pos = output.stream_position()?; // save starting point + + // handle 32 bit offsets + if let Some(stco_list) = bmff_path_map.get("/moov/trak/mdia/minf/stbl/stco") { + for stco_token in stco_list { + let stco_box_info = &bmff_tree[*stco_token].data; + if stco_box_info.box_type != BoxType::StcoBox { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read stco box and patch + output.seek(SeekFrom::Start(stco_box_info.offset))?; + + // read header + let header = BoxHeaderLite::read(output) + .map_err(|_err| Error::InvalidAsset("Bad BMFF".to_string()))?; + if header.name != BoxType::StcoBox { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read extended header + let (_version, _flags) = read_box_header_ext(output)?; // box extensions + + // get count of offsets + let entry_count = output.read_u32::()?; + + // read and patch offsets + let entry_start_pos = output.stream_position()?; + let mut entries: Vec = Vec::new(); + for _e in 0..entry_count { + let offset = output.read_u32::()?; + let new_offset = if adjust < 0 { + offset + - u32::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + } else { + offset + + u32::try_from(adjust).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + }; + entries.push(new_offset); + } + + // write updated offsets + output.seek(SeekFrom::Start(entry_start_pos))?; + for e in entries { + output.write_u32::(e)?; + } + } + } + + // handle 64 offsets + if let Some(co64_list) = bmff_path_map.get("/moov/trak/mdia/minf/stbl/co64") { + for co64_token in co64_list { + let co64_box_info = &bmff_tree[*co64_token].data; + if co64_box_info.box_type != BoxType::Co64Box { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read co64 box and patch + output.seek(SeekFrom::Start(co64_box_info.offset))?; + + // read header + let header = BoxHeaderLite::read(output) + .map_err(|_err| Error::InvalidAsset("Bad BMFF".to_string()))?; + if header.name != BoxType::Co64Box { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read extended header + let (_version, _flags) = read_box_header_ext(output)?; // box extensions + + // get count of offsets + let entry_count = output.read_u32::()?; + + // read and patch offsets + let entry_start_pos = output.stream_position()?; + let mut entries: Vec = Vec::new(); + for _e in 0..entry_count { + let offset = output.read_u64::()?; + let new_offset = if adjust < 0 { + offset + - u64::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + } else { + offset + + u64::try_from(adjust).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + }; + entries.push(new_offset); + } + + // write updated offsets + output.seek(SeekFrom::Start(entry_start_pos))?; + for e in entries { + output.write_u64::(e)?; + } + } + } + + // handle meta iloc + if let Some(iloc_list) = bmff_path_map.get("/meta/iloc") { + for iloc_token in iloc_list { + let iloc_box_info = &bmff_tree[*iloc_token].data; + if iloc_box_info.box_type != BoxType::IlocBox { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read iloc box and patch + output.seek(SeekFrom::Start(iloc_box_info.offset))?; + + // read header + let header = BoxHeaderLite::read(output) + .map_err(|_err| Error::InvalidAsset("Bad BMFF".to_string()))?; + if header.name != BoxType::IlocBox { + return Err(Error::InvalidAsset("Bad BMFF".to_string())); + } + + // read extended header + let (version, _flags) = read_box_header_ext(output)?; // box extensions + + // read next 16 bits (in file byte order) + let mut iloc_header = [0u8, 2]; + output.read_exact(&mut iloc_header)?; + + // get offset size (high nibble) + let offset_size: u8 = (iloc_header[0] & 0xf0) >> 4; + + // get length size (low nibble) + let length_size: u8 = iloc_header[0] & 0x0f; + + // get box offset size (high nibble) + let base_offset_size: u8 = (iloc_header[1] & 0xf0) >> 4; + + // get index size (low nibble) + let index_size: u8 = iloc_header[1] & 0x0f; + + // get item count + let item_count = match version { + _v if version < 2 => output.read_u16::()? as u32, + _v if version == 2 => output.read_u32::()?, + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF unknown iloc format".to_string(), + )) + } + }; + + // walk the iloc items and patch + for _i in 0..item_count { + // read item id + let _item_id = match version { + _v if version < 2 => output.read_u16::()? as u32, + 2 => output.read_u32::()?, + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF: unknown iloc item".to_string(), + )) + } + }; + + // read construction method + let construction_method = if version == 1 || version == 2 { + let mut cm_bytes = [0u8, 2]; + output.read_exact(&mut cm_bytes)?; + + // lower nibble of 2nd byte + cm_bytes[1] & 0x0f + } else { + 0 + }; + + // read data reference index + let _data_reference_index = output.read_u16::()?; + + let base_offset_file_pos = output.stream_position()?; + let base_offset = match base_offset_size { + 0 => 0_u64, + 4 => output.read_u32::()? as u64, + 8 => output.read_u64::()?, + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF: unknown iloc offset size".to_string(), + )) + } + }; + + // patch the offsets if needed + if construction_method == 0 { + // file offset construction method + if base_offset_size == 4 { + let new_offset = if adjust < 0 { + u32::try_from(base_offset).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? - u32::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + } else { + u32::try_from(base_offset).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + u32::try_from(adjust).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + }; + + output.seek(SeekFrom::Start(base_offset_file_pos))?; + output.write_u32::(new_offset)?; + } + + if base_offset_size == 8 { + let new_offset = if adjust < 0 { + base_offset + - u64::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + } else { + base_offset + + u64::try_from(adjust).map_err(|_| { + Error::InvalidAsset("Bad BMFF offset adjustment".to_string()) + })? + }; + + output.seek(SeekFrom::Start(base_offset_file_pos))?; + output.write_u64::(new_offset)?; + } + } + + // read extent count + let extent_count = output.read_u16::()?; + + // consume the extents + for _e in 0..extent_count { + let _extent_index = if version == 1 || (version == 2 && index_size > 0) { + match base_offset_size { + 4 => Some(output.read_u32::()? as u64), + 8 => Some(output.read_u64::()?), + _ => None, + } + } else { + None + }; + + let extent_offset_file_pos = output.stream_position()?; + let extent_offset = match offset_size { + 0 => 0_u64, + 4 => output.read_u32::()? as u64, + 8 => output.read_u64::()?, + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF: unknown iloc extent_offset size".to_string(), + )) + } + }; + + // no base offset so just adjust the raw extent_offset value + if construction_method == 0 && base_offset == 0 && extent_offset != 0 { + output.seek(SeekFrom::Start(extent_offset_file_pos))?; + match offset_size { + 4 => { + let new_offset = if adjust < 0 { + extent_offset as u32 + - u32::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset( + "Bad BMFF offset adjustment".to_string(), + ) + })? + } else { + extent_offset as u32 + + u32::try_from(adjust).map_err(|_| { + Error::InvalidAsset( + "Bad BMFF offset adjustment".to_string(), + ) + })? + }; + output.write_u32::(new_offset)?; + } + 8 => { + let new_offset = if adjust < 0 { + extent_offset + - u64::try_from(adjust.abs()).map_err(|_| { + Error::InvalidAsset( + "Bad BMFF offset adjustment".to_string(), + ) + })? + } else { + extent_offset + + u64::try_from(adjust).map_err(|_| { + Error::InvalidAsset( + "Bad BMFF offset adjustment".to_string(), + ) + })? + }; + output.write_u64::(new_offset)?; + } + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF: unknown extent_offset format".to_string(), + )) + } + } + } + + let _extent_length = match length_size { + 0 => 0_u64, + 4 => output.read_u32::()? as u64, + 8 => output.read_u64::()?, + _ => { + return Err(Error::InvalidAsset( + "Bad BMFF: unknown iloc offset size".to_string(), + )) + } + }; + } + } + } + } + + // restore seek point + output.seek(SeekFrom::Start(start_pos))?; + output.flush()?; + + Ok(()) +} + +pub(crate) fn build_bmff_tree( + reader: &mut R, + end: u64, + bmff_tree: &mut Arena, + current_node: &Token, + bmff_path_map: &mut HashMap>, +) -> Result<()> { + let start = reader.stream_position()?; + + let mut current = start; + while current < end { + // Get box header. + let header = BoxHeaderLite::read(reader) + .map_err(|_err| Error::InvalidAsset("Bad BMFF".to_string()))?; + + // Break if size zero BoxHeader + let s = header.size; + if s == 0 { + break; + } + + // Match and parse the supported atom boxes. + match header.name { + BoxType::UuidBox => { + let start = box_start(reader, header.large_size)?; + + let mut extended_type = [0u8; 16]; // 16 bytes of UUID + reader.read_exact(&mut extended_type)?; + + let (version, flags) = read_box_header_ext(reader)?; + + let b = BoxInfo { + path: header.fourcc.clone(), + offset: start, + size: s, + box_type: BoxType::UuidBox, + parent: Some(*current_node), + user_type: Some(extended_type.to_vec()), + version: Some(version), + flags: Some(flags), + }; + + let new_token = current_node.append(bmff_tree, b); + + let path = path_from_token(bmff_tree, &new_token)?; + add_token_to_cache(bmff_path_map, path, new_token); + + // position seek pointer + skip_bytes_to(reader, start + s)?; + } + // container box types + BoxType::MoovBox + | BoxType::TrakBox + | BoxType::MdiaBox + | BoxType::MinfBox + | BoxType::StblBox + | BoxType::MoofBox + | BoxType::TrafBox + | BoxType::EdtsBox + | BoxType::UdtaBox + | BoxType::DinfBox + | BoxType::TrefBox + | BoxType::TregBox + | BoxType::MvexBox + | BoxType::MfraBox + | BoxType::MetaBox + | BoxType::SchiBox => { + let start = box_start(reader, header.large_size)?; + + let b = if FULL_BOX_TYPES.contains(&header.fourcc.as_str()) { + let (version, flags) = read_box_header_ext(reader)?; // box extensions + BoxInfo { + path: header.fourcc.clone(), + offset: start, + size: s, + box_type: header.name, + parent: Some(*current_node), + user_type: None, + version: Some(version), + flags: Some(flags), + } + } else { + BoxInfo { + path: header.fourcc.clone(), + offset: start, + size: s, + box_type: header.name, + parent: Some(*current_node), + user_type: None, + version: None, + flags: None, + } + }; + + let new_token = bmff_tree.new_node(b); + current_node + .append_node(bmff_tree, new_token) + .map_err(|_err| Error::InvalidAsset("Bad BMFF Graph".to_string()))?; + + let path = path_from_token(bmff_tree, &new_token)?; + add_token_to_cache(bmff_path_map, path, new_token); + + // consume all sub-boxes + let mut current = reader.stream_position()?; + let end = start + s; + while current < end { + build_bmff_tree(reader, end, bmff_tree, &new_token, bmff_path_map)?; + current = reader.stream_position()?; + } + + // position seek pointer + skip_bytes_to(reader, start + s)?; + } + _ => { + let start = box_start(reader, header.large_size)?; + + let b = if FULL_BOX_TYPES.contains(&header.fourcc.as_str()) { + let (version, flags) = read_box_header_ext(reader)?; // box extensions + BoxInfo { + path: header.fourcc.clone(), + offset: start, + size: s, + box_type: header.name, + parent: Some(*current_node), + user_type: None, + version: Some(version), + flags: Some(flags), + } + } else { + BoxInfo { + path: header.fourcc.clone(), + offset: start, + size: s, + box_type: header.name, + parent: Some(*current_node), + user_type: None, + version: None, + flags: None, + } + }; + + let new_token = current_node.append(bmff_tree, b); + + let path = path_from_token(bmff_tree, &new_token)?; + add_token_to_cache(bmff_path_map, path, new_token); + + // position seek pointer + skip_bytes_to(reader, start + s)?; + } + } + current = reader.stream_position()?; + } + + Ok(()) +} + +fn get_uuid_token( + bmff_tree: &Arena, + bmff_map: &HashMap>, + uuid: &[u8; 16], +) -> Option { + if let Some(uuid_list) = bmff_map.get("/uuid") { + for uuid_token in uuid_list { + let box_info = &bmff_tree[*uuid_token]; + + // make sure it is UUID box + if box_info.data.box_type == BoxType::UuidBox { + if let Some(found_uuid) = &box_info.data.user_type { + // make sure uuids match + if vec_compare(uuid, found_uuid) { + return Some(*uuid_token); + } + } + } + } + } + None +} + +pub(crate) struct C2PABmffBoxes { + pub manifest_bytes: Option>, + pub bmff_merkle: Vec, + pub box_infos: Vec, + pub xmp: Option, +} + +pub(crate) fn read_bmff_c2pa_boxes(reader: &mut dyn CAIRead) -> Result { + let size = reader.seek(SeekFrom::End(0))?; + reader.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree(reader, size, &mut bmff_tree, &root_token, &mut bmff_map)?; + + let mut output: Option> = None; + let mut xmp: Option = None; + let mut _first_aux_uuid = 0; + let mut merkle_boxes: Vec = Vec::new(); + + // grab top level (for now) C2PA box + if let Some(uuid_list) = bmff_map.get("/uuid") { + let mut manifest_store_cnt = 0; + + for uuid_token in uuid_list { + let box_info = &bmff_tree[*uuid_token]; + + // make sure it is UUID box + if box_info.data.box_type == BoxType::UuidBox { + if let Some(uuid) = &box_info.data.user_type { + // make sure it is a C2PA ContentProvenanceBox box + if vec_compare(&C2PA_UUID, uuid) { + let mut data_len = box_info.data.size - HEADER_SIZE - 16 /*UUID*/; + + // set reader to start of box contents + skip_bytes_to(reader, box_info.data.offset + HEADER_SIZE + 16)?; + + // Fullbox => 8 bits for version 24 bits for flags + let (_version, _flags) = read_box_header_ext(reader)?; + data_len -= 4; + + // get the purpose + let mut purpose = Vec::with_capacity(64); + loop { + let mut buf = [0; 1]; + reader.read_exact(&mut buf)?; + data_len -= 1; + if buf[0] == 0x00 { + break; + } else { + purpose.push(buf[0]); + } + } + + // is the purpose manifest? + if vec_compare(&purpose, MANIFEST.as_bytes()) { + // offset to first aux uuid with purpose merkle + let mut buf = [0u8; 8]; + reader.read_exact(&mut buf)?; + data_len -= 8; + + // offset to first aux uuid + let offset = u64::from_be_bytes(buf); + + // read the manifest + if manifest_store_cnt == 0 { + let mut manifest = vec![0u8; data_len as usize]; + reader.read_exact(&mut manifest)?; + output = Some(manifest); + + manifest_store_cnt += 1; + } else { + return Err(Error::TooManyManifestStores); + } + + // if contains offset this asset contains additional UUID boxes + if offset != 0 { + _first_aux_uuid = offset; + } + } else if vec_compare(&purpose, MERKLE.as_bytes()) { + let mut merkle = vec![0u8; data_len as usize]; + reader.read_exact(&mut merkle)?; + + // strip trailing zeros + loop { + if !merkle.is_empty() && merkle[merkle.len() - 1] == 0 { + merkle.pop(); + } + + if merkle.is_empty() || merkle[merkle.len() - 1] != 0 { + break; + } + } + + // find uuid from uuid list + let mm: BmffMerkleMap = serde_cbor::from_slice(&merkle)?; + merkle_boxes.push(mm); + } + } else if vec_compare(&XMP_UUID, uuid) { + let data_len = box_info.data.size - HEADER_SIZE - 16 /*UUID*/; + + // set reader to start of box contents + skip_bytes_to(reader, box_info.data.offset + HEADER_SIZE + 16)?; + + let mut xmp_vec = vec![0u8; data_len as usize]; + reader.read_exact(&mut xmp_vec)?; + + if let Ok(xmp_string) = String::from_utf8(xmp_vec) { + xmp = Some(xmp_string); + } + } + } + } + } + } + + // get position ordered list of boxes + let mut box_infos: Vec = get_top_level_boxes(&bmff_tree, &bmff_map); + box_infos.sort_by(|a, b| a.offset.cmp(&b.offset)); + + Ok(C2PABmffBoxes { + manifest_bytes: output, + bmff_merkle: merkle_boxes, + box_infos, + xmp, + }) +} + +impl CAIReader for BmffIO { + fn read_cai(&self, reader: &mut dyn CAIRead) -> Result> { + let c2pa_boxes = read_bmff_c2pa_boxes(reader)?; + + c2pa_boxes.manifest_bytes.ok_or(Error::JumbfNotFound) + } + + // Get XMP block + fn read_xmp(&self, reader: &mut dyn CAIRead) -> Option { + let c2pa_boxes = read_bmff_c2pa_boxes(reader).ok()?; + + c2pa_boxes.xmp + } +} + +impl AssetIO for BmffIO { + fn asset_patch_ref(&self) -> Option<&dyn AssetPatch> { + Some(self) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut input_stream = std::fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations( + &self, + _asset_path: &std::path::Path, + ) -> Result> { + let vec: Vec = Vec::new(); + Ok(vec) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + let mut input_file = std::fs::File::open(asset_path)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.remove_cai_store_from_stream(&mut input_file, &mut temp_file)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn new(asset_type: &str) -> Self + where + Self: Sized, + { + BmffIO { + bmff_format: asset_type.to_string(), + } + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(BmffIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(BmffIO::new(asset_type))) + } + + fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { + Some(self) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +impl CAIWriter for BmffIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + let size = input_stream.seek(SeekFrom::End(0))?; + input_stream.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree( + input_stream, + size, + &mut bmff_tree, + &root_token, + &mut bmff_map, + )?; + + // get ftyp location + // start after ftyp + let ftyp_token = bmff_map.get("/ftyp").ok_or(Error::UnsupportedType)?; // todo check ftyps to make sure we support any special format requirements + let ftyp_info = &bmff_tree[ftyp_token[0]].data; + let ftyp_offset = ftyp_info.offset; + let ftyp_size = ftyp_info.size; + + // get position to insert c2pa + let (c2pa_start, c2pa_length) = + if let Some(c2pa_token) = get_uuid_token(&bmff_tree, &bmff_map, &C2PA_UUID) { + let uuid_info = &bmff_tree[c2pa_token].data; + + (uuid_info.offset, Some(uuid_info.size)) + } else { + ((ftyp_offset + ftyp_size), None) + }; + + let mut new_c2pa_box: Vec = Vec::with_capacity(store_bytes.len() * 2); + let merkle_data: &[u8] = &[]; // not yet supported + write_c2pa_box(&mut new_c2pa_box, store_bytes, true, merkle_data)?; + let new_c2pa_box_size = new_c2pa_box.len(); + + let (start, end) = if let Some(c2pa_length) = c2pa_length { + let start = usize::value_from(c2pa_start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + let end = usize::value_from(c2pa_start + c2pa_length) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + (start, end) + } else { + // insert new c2pa + let end = usize::value_from(c2pa_start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + (end, end) + }; + + // write content before ContentProvenanceBox + input_stream.rewind()?; + let mut before_manifest = input_stream.take(start as u64); + std::io::copy(&mut before_manifest, output_stream)?; + + // write ContentProvenanceBox + output_stream.write_all(&new_c2pa_box)?; + + // calc offset adjustments + let offset_adjust: i32 = if end == 0 { + new_c2pa_box_size as i32 + } else { + // value could be negative if box is truncated + let existing_c2pa_box_size = end - start; + let pad_size: i32 = new_c2pa_box_size as i32 - existing_c2pa_box_size as i32; + pad_size + }; + + // write content after ContentProvenanceBox + input_stream.seek(SeekFrom::Start(end as u64))?; + std::io::copy(input_stream, output_stream)?; + + // Manipulating the UUID box means we may need some patch offsets if they are file absolute offsets. + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + // map box layout of current output file + let (mut output_bmff_tree, root_token) = Arena::with_data(root_box); + let mut output_bmff_map: HashMap> = HashMap::new(); + + let size = output_stream.seek(SeekFrom::End(0))?; + output_stream.rewind()?; + build_bmff_tree( + output_stream, + size, + &mut output_bmff_tree, + &root_token, + &mut output_bmff_map, + )?; + + // adjust offsets based on current layout + output_stream.rewind()?; + adjust_known_offsets( + output_stream, + &output_bmff_tree, + &output_bmff_map, + offset_adjust, + ) + } + + fn get_object_locations_from_stream( + &self, + _input_stream: &mut dyn CAIRead, + ) -> Result> { + let vec: Vec = Vec::new(); + Ok(vec) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + let size = input_stream.seek(SeekFrom::End(0))?; + input_stream.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree( + input_stream, + size, + &mut bmff_tree, + &root_token, + &mut bmff_map, + )?; + + // get position of c2pa manifest + let (c2pa_start, c2pa_length) = + if let Some(c2pa_token) = get_uuid_token(&bmff_tree, &bmff_map, &C2PA_UUID) { + let uuid_info = &bmff_tree[c2pa_token].data; + + (uuid_info.offset, Some(uuid_info.size)) + } else { + input_stream.rewind()?; + std::io::copy(input_stream, output_stream)?; + return Ok(()); // no box to remove, propagate source to output + }; + + let (start, end) = if let Some(c2pa_length) = c2pa_length { + let start = usize::value_from(c2pa_start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + let end = usize::value_from(c2pa_start + c2pa_length) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + (start, end) + } else { + return Err(Error::InvalidAsset("value out of range".to_string())); + }; + + // write content before ContentProvenanceBox + input_stream.rewind()?; + let mut before_manifest = input_stream.take(start as u64); + std::io::copy(&mut before_manifest, output_stream)?; + + // calc offset adjustments + // value will be negative since the box is truncated + let new_c2pa_box_size: i32 = 0; + let existing_c2pa_box_size = end - start; + let offset_adjust = new_c2pa_box_size - existing_c2pa_box_size as i32; + + // write content after ContentProvenanceBox + input_stream.seek(SeekFrom::Start(end as u64))?; + std::io::copy(input_stream, output_stream)?; + + // Manipulating the UUID box means we may need some patch offsets if they are file absolute offsets. + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + // map box layout of current output file + let (mut output_bmff_tree, root_token) = Arena::with_data(root_box); + let mut output_bmff_map: HashMap> = HashMap::new(); + + let size = output_stream.seek(SeekFrom::End(0))?; + output_stream.rewind()?; + build_bmff_tree( + output_stream, + size, + &mut output_bmff_tree, + &root_token, + &mut output_bmff_map, + )?; + + // adjust offsets based on current layout + output_stream.rewind()?; + adjust_known_offsets( + output_stream, + &output_bmff_tree, + &output_bmff_map, + offset_adjust, + ) + } +} + +impl AssetPatch for BmffIO { + fn patch_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut asset = OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open(asset_path)?; + let size = asset.seek(SeekFrom::End(0))?; + asset.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree(&mut asset, size, &mut bmff_tree, &root_token, &mut bmff_map)?; + + // get position to insert c2pa + let (c2pa_start, c2pa_length) = if let Some(uuid_tokens) = bmff_map.get("/uuid") { + let uuid_info = &bmff_tree[uuid_tokens[0]].data; + + // is this a C2PA manifest + let is_c2pa = if let Some(uuid) = &uuid_info.user_type { + // make sure it is a C2PA box + vec_compare(&C2PA_UUID, uuid) + } else { + false + }; + + if is_c2pa { + (uuid_info.offset, Some(uuid_info.size)) + } else { + (0, None) + } + } else { + return Err(Error::InvalidAsset( + "patch_cai_store found no manifest store to patch.".to_string(), + )); + }; + + if let Some(manifest_length) = c2pa_length { + let mut new_c2pa_box: Vec = Vec::with_capacity(store_bytes.len() * 2); + let merkle_data: &[u8] = &[]; // not yet supported + write_c2pa_box(&mut new_c2pa_box, store_bytes, true, merkle_data)?; + let new_c2pa_box_size = new_c2pa_box.len(); + + if new_c2pa_box_size as u64 == manifest_length { + asset.seek(SeekFrom::Start(c2pa_start))?; + asset.write_all(&new_c2pa_box)?; + Ok(()) + } else { + Err(Error::InvalidAsset( + "patch_cai_store store size mismatch.".to_string(), + )) + } + } else { + Err(Error::InvalidAsset( + "patch_cai_store store size mismatch.".to_string(), + )) + } + } +} + +impl RemoteRefEmbed for BmffIO { + #[allow(unused_variables)] + fn embed_reference( + &self, + asset_path: &Path, + embed_ref: crate::asset_io::RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + let output_buf = Vec::new(); + let mut output_stream = Cursor::new(output_buf); + + // block so that source file is closed after embed + { + let mut source_stream = std::fs::File::open(asset_path)?; + self.embed_reference_to_stream( + &mut source_stream, + &mut output_stream, + RemoteRefEmbedType::Xmp(manifest_uri), + )?; + } + + // write will replace exisiting contents + std::fs::write(asset_path, output_stream.into_inner())?; + Ok(()) + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } + + fn embed_reference_to_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + embed_ref: RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + let xmp = match self.get_reader().read_xmp(input_stream) { + Some(xmp) => add_provenance(&xmp, &manifest_uri)?, + None => { + let xmp = format!("http://ns.adobe.com/xap/1.0/\0 {}", MIN_XMP); + add_provenance(&xmp, &manifest_uri)? + } + }; + + let size = input_stream.seek(SeekFrom::End(0))?; + input_stream.rewind()?; + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + let (mut bmff_tree, root_token) = Arena::with_data(root_box); + let mut bmff_map: HashMap> = HashMap::new(); + + // build layout of the BMFF structure + build_bmff_tree( + input_stream, + size, + &mut bmff_tree, + &root_token, + &mut bmff_map, + )?; + + // get ftyp location + // start after ftyp + let ftyp_token = bmff_map.get("/ftyp").ok_or(Error::UnsupportedType)?; // todo check ftyps to make sure we support any special format requirements + let ftyp_info = &bmff_tree[ftyp_token[0]].data; + let ftyp_offset = ftyp_info.offset; + let ftyp_size = ftyp_info.size; + + // get position to insert xmp + let (xmp_start, xmp_length) = + if let Some(c2pa_token) = get_uuid_token(&bmff_tree, &bmff_map, &XMP_UUID) { + let uuid_info = &bmff_tree[c2pa_token].data; + + (uuid_info.offset, Some(uuid_info.size)) + } else { + ((ftyp_offset + ftyp_size), None) + }; + + let mut new_xmp_box: Vec = Vec::with_capacity(xmp.len() * 2); + write_xmp_box(&mut new_xmp_box, xmp.as_bytes())?; + let new_xmp_box_size = new_xmp_box.len(); + + let (start, end) = if let Some(xmp_length) = xmp_length { + let start = usize::value_from(xmp_start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + let end = usize::value_from(xmp_start + xmp_length) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + (start, end) + } else { + // insert new c2pa + let end = usize::value_from(xmp_start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + (end, end) + }; + + // write content before XMP box + input_stream.rewind()?; + let mut before_manifest = input_stream.take(start as u64); + std::io::copy(&mut before_manifest, output_stream)?; + + // write ContentProvenanceBox + output_stream.write_all(&new_xmp_box)?; + + // calc offset adjustments + let offset_adjust: i32 = if end == 0 { + new_xmp_box_size as i32 + } else { + // value could be negative if box is truncated + let existing_xmp_box_size = end - start; + let pad_size: i32 = new_xmp_box_size as i32 - existing_xmp_box_size as i32; + pad_size + }; + + // write content after XMP box + input_stream.seek(SeekFrom::Start(end as u64))?; + std::io::copy(input_stream, output_stream)?; + + // Manipulating the UUID box means we may need some patch offsets if they are file absolute offsets. + + // create root node + let root_box = BoxInfo { + path: "".to_string(), + offset: 0, + size, + box_type: BoxType::Empty, + parent: None, + user_type: None, + version: None, + flags: None, + }; + + // map box layout of current output file + let (mut output_bmff_tree, root_token) = Arena::with_data(root_box); + let mut output_bmff_map: HashMap> = HashMap::new(); + + let size = output_stream.seek(SeekFrom::End(0))?; + output_stream.rewind()?; + build_bmff_tree( + output_stream, + size, + &mut output_bmff_tree, + &root_token, + &mut output_bmff_map, + )?; + + // adjust offsets based on current layout + output_stream.rewind()?; + adjust_known_offsets( + output_stream, + &output_bmff_tree, + &output_bmff_map, + offset_adjust, + ) + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } +} +#[cfg(test)] +pub mod tests { + #![allow(clippy::expect_used)] + #![allow(clippy::panic)] + #![allow(clippy::unwrap_used)] + + use tempfile::tempdir; + + use super::*; + use crate::utils::test::{fixture_path, temp_dir_path}; + + #[cfg(not(target_arch = "wasm32"))] + #[cfg(feature = "file_io")] + #[test] + fn test_read_mp4() { + use crate::{ + status_tracker::{report_split_errors, DetailedStatusTracker, StatusTracker}, + store::Store, + }; + + let ap = fixture_path("video1.mp4"); + + let mut log = DetailedStatusTracker::default(); + let store = Store::load_from_asset(&ap, true, &mut log); + + let errors = report_split_errors(log.get_log_mut()); + assert!(errors.is_empty()); + + if let Ok(s) = store { + print!("Store: \n{s}"); + } + } + + #[test] + fn test_xmp_write() { + let data = "some test data"; + let source = fixture_path("video1.mp4"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "video1-out.mp4"); + + std::fs::copy(source, &output).unwrap(); + + let bmff = BmffIO::new("mp4"); + + let eh = bmff.remote_ref_writer_ref().unwrap(); + + eh.embed_reference(&output, RemoteRefEmbedType::Xmp(data.to_string())) + .unwrap(); + + let mut output_stream = std::fs::File::open(&output).unwrap(); + let xmp = bmff.get_reader().read_xmp(&mut output_stream).unwrap(); + + let loaded = crate::utils::xmp_inmemory_utils::extract_provenance(&xmp).unwrap(); + + assert_eq!(&loaded, data); + } + + #[test] + fn test_truncated_c2pa_write_mp4() { + let test_data = "some test data".as_bytes(); + let source = fixture_path("video1.mp4"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "mp4_test.mp4"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let bmff = BmffIO::new("mp4"); + + //let test_data = bmff.read_cai_store(&source).unwrap(); + if let Ok(()) = bmff.save_cai_store(&output, test_data) { + if let Ok(read_test_data) = bmff.read_cai_store(&output) { + assert!(vec_compare(test_data, &read_test_data)); + success = true; + } + } + } + } + assert!(success) + } + + #[test] + fn test_expanded_c2pa_write_mp4() { + let mut more_data = "some more test data".as_bytes().to_vec(); + let source = fixture_path("video1.mp4"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "mp4_test.mp4"); + + if let Ok(_size) = std::fs::copy(&source, &output) { + let bmff = BmffIO::new("mp4"); + + if let Ok(mut test_data) = bmff.read_cai_store(&source) { + test_data.append(&mut more_data); + if let Ok(()) = bmff.save_cai_store(&output, &test_data) { + if let Ok(read_test_data) = bmff.read_cai_store(&output) { + assert!(vec_compare(&test_data, &read_test_data)); + success = true; + } + } + } + } + } + assert!(success) + } + + #[test] + fn test_patch_c2pa_write_mp4() { + let test_data = "some test data".as_bytes(); + let source = fixture_path("video1.mp4"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "mp4_test.mp4"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let bmff = BmffIO::new("mp4"); + + if let Ok(source_data) = bmff.read_cai_store(&output) { + // create replacement data of same size + let mut new_data = vec![0u8; source_data.len()]; + new_data[..test_data.len()].copy_from_slice(test_data); + bmff.patch_cai_store(&output, &new_data).unwrap(); + + let replaced = bmff.read_cai_store(&output).unwrap(); + + assert_eq!(new_data, replaced); + + success = true; + } + } + } + assert!(success) + } + + #[test] + fn test_remove_c2pa() { + let source = fixture_path("video1.mp4"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "mp4_test.mp4"); + + std::fs::copy(source, &output).unwrap(); + let bmff_io = BmffIO::new("mp4"); + + bmff_io.remove_cai_store(&output).unwrap(); + + // read back in asset, JumbfNotFound is expected since it was removed + match bmff_io.read_cai_store(&output) { + Err(Error::JumbfNotFound) => (), + _ => unreachable!(), + } + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/c2pa.rs b/sdk/crates/c2pa-codecs/src/codecs/c2pa.rs new file mode 100644 index 000000000..4421e910b --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/c2pa.rs @@ -0,0 +1,97 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::io::{Read, Seek, Write}; + +use crate::{ + ByteSpan, C2paSpan, CodecError, Decode, DefaultSpan, Embed, Embeddable, Encode, EncodeInPlace, + Span, Support, +}; + +/// Supports working with ".c2pa" files containing only manifest store data +#[derive(Debug)] +pub struct C2paCodec { + src: R, +} + +impl C2paCodec { + pub fn new(src: R) -> Self { + Self { src } + } +} + +impl Support for C2paCodec<()> { + const MAX_SIGNATURE_LEN: usize = 0; + + fn supports_extension(extension: &str) -> bool { + extension == "c2pa" + } + + fn supports_mime(mime: &str) -> bool { + mime == "application/c2pa" || mime == "application/x-c2pa-manifest-store" + } +} + +impl Decode for C2paCodec { + fn read_c2pa(&mut self) -> Result>, CodecError> { + let mut cai_data = Vec::new(); + // read the whole file + self.src.read_to_end(&mut cai_data)?; + Ok(Some(cai_data)) + } +} + +impl Encode for C2paCodec { + fn write_c2pa(&mut self, mut dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + // just write the store bytes and ingore the input stream + dst.write_all(c2pa)?; + Ok(()) + } + + fn remove_c2pa(&mut self, _dst: impl Write) -> Result { + // TODO: true or false? + Ok(false) + } +} + +impl EncodeInPlace for C2paCodec { + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError> { + self.src.write_all(c2pa)?; + Ok(()) + } +} + +impl Embed for C2paCodec { + fn embeddable(bytes: &[u8]) -> Result { + Ok(Embeddable { + bytes: bytes.to_vec(), + }) + } + + fn embed(&mut self, embeddable: Embeddable, mut dst: impl Write) -> Result<(), CodecError> { + dst.write_all(&embeddable.bytes)?; + Ok(()) + } +} + +impl Span for C2paCodec { + fn span(&mut self) -> Result { + todo!() + } + + fn c2pa_span(&mut self) -> Result { + Ok(C2paSpan { + spans: vec![ByteSpan { start: 0, len: 0 }], + }) + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/gif.rs b/sdk/crates/c2pa-codecs/src/codecs/gif.rs new file mode 100644 index 000000000..7f675abe5 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/gif.rs @@ -0,0 +1,1269 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + io::{self, Read, Seek, SeekFrom, Write}, + str, +}; + +use byteorder::{ReadBytesExt, WriteBytesExt}; + +use crate::{ + BoxSpan, ByteSpan, C2paSpan, CodecError, Decode, DefaultSpan, Embed, Embeddable, Encode, + EncodeInPlace, NamedByteSpan, Span, Support, +}; + +// https://www.w3.org/Graphics/GIF/spec-gif89a.txt +#[derive(Debug)] +pub struct GifCodec { + src: R, +} + +impl GifCodec { + pub fn new(src: R) -> Self { + Self { src } + } +} + +impl Support for GifCodec<()> { + const MAX_SIGNATURE_LEN: usize = 3; + + fn supports_signature(signature: &[u8]) -> bool { + signature[0..3] == *b"GIF" + } + + fn supports_extension(ext: &str) -> bool { + matches!(ext, "gif") + } + + fn supports_mime(mime: &str) -> bool { + matches!(mime, "image/gif") + } +} + +impl Embed for GifCodec { + fn embeddable(bytes: &[u8]) -> Result { + Ok(Embeddable { + bytes: ApplicationExtension::new_c2pa(bytes).to_bytes(), + }) + } + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError> { + todo!() + } +} + +impl Encode for GifCodec { + fn write_c2pa(&mut self, mut dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + let old_block_marker = self.find_c2pa_block()?; + let new_block = ApplicationExtension::new_c2pa(c2pa); + + match old_block_marker { + Some(old_block_marker) => { + self.replace_block(&mut dst, &old_block_marker.into(), &new_block.into()) + } + None => self.insert_block(&mut dst, &new_block.into()), + } + } + + fn remove_c2pa(&mut self, mut dst: impl Write) -> Result { + match self.find_c2pa_block()? { + Some(block_marker) => { + self.remove_block(&mut dst, &block_marker.into())?; + Ok(true) + } + None => Ok(false), + } + } + + fn write_xmp(&mut self, mut dst: impl Write, xmp: &str) -> Result<(), CodecError> { + let old_block_marker = self.find_xmp_block()?; + let new_block = ApplicationExtension::new_xmp(xmp.as_bytes().to_vec()); + + match old_block_marker { + Some(old_block_marker) => { + self.replace_block(&mut dst, &old_block_marker.into(), &new_block.into()) + } + None => self.insert_block(&mut dst, &new_block.into()), + } + } +} + +impl EncodeInPlace for GifCodec { + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError> { + let mut codec = GifCodec::new(&mut self.src); + let old_block_marker = match codec.find_c2pa_block()? { + Some(old_block_marker) => old_block_marker, + None => return Err(CodecError::NothingToPatch), + }; + + let new_block = ApplicationExtension::new_c2pa(c2pa); + + Self::replace_block_in_place(&mut self.src, &old_block_marker.into(), &new_block.into()) + } +} + +// TODO: the methods below can be implemented much more conveniently within impl BlockMarker +impl GifCodec { + fn remove_block( + &mut self, + mut dst: impl Write, + block_meta: &BlockMarker, + ) -> Result<(), CodecError> { + self.src.rewind()?; + + let mut start_stream = self.src.by_ref().take(block_meta.start()); + io::copy(&mut start_stream, &mut dst)?; + + self.src.seek(SeekFrom::Current( + i64::try_from(block_meta.len()).map_err(CodecError::SeekOutOfBounds)?, + ))?; + io::copy(&mut self.src, &mut dst)?; + + Ok(()) + } + + fn replace_block( + &mut self, + mut dst: impl Write, + old_block_marker: &BlockMarker, + new_block: &Block, + ) -> Result<(), CodecError> { + self.src.rewind()?; + + // Write everything before the replacement block. + let mut start_stream = self.src.by_ref().take(old_block_marker.start()); + io::copy(&mut start_stream, &mut dst)?; + + dst.write_all(&new_block.to_bytes()?)?; + + // Write everything after the replacement block. + self.src.seek(SeekFrom::Current( + i64::try_from(old_block_marker.len()).map_err(CodecError::SeekOutOfBounds)?, + ))?; + io::copy(&mut self.src, &mut dst)?; + + Ok(()) + } + + fn insert_block(&mut self, mut dst: impl Write, block: &Block) -> Result<(), CodecError> { + self.skip_preamble()?; + + // Position before any blocks start. + let end_preamble_pos = self.src.stream_position()?; + self.update_to_89a(&mut dst)?; + let after_update_pos = self.src.stream_position()?; + + let mut start_stream = self.src.by_ref().take(end_preamble_pos - after_update_pos); + io::copy(&mut start_stream, &mut dst)?; + + dst.write_all(&block.to_bytes()?)?; + + io::copy(&mut self.src, &mut dst)?; + + Ok(()) + } + + fn replace_block_in_place( + mut dst: impl Write + Seek, + old_block_marker: &BlockMarker, + new_block: &Block, + ) -> Result<(), CodecError> { + // TODO: if new_block len < old_block len, pad the new block + let new_bytes = new_block.to_bytes()?; + if new_bytes.len() as u64 != old_block_marker.len() { + return Err(CodecError::InvalidPatchSize { + expected: old_block_marker.len(), + actual: new_bytes.len() as u64, + }); + } + + dst.seek(SeekFrom::Start(old_block_marker.start()))?; + dst.write_all(&new_bytes)?; + + Ok(()) + } + + // GIF has two versions: 87a and 89a. 87a doesn't support block extensions, so if the input stream is + // 87a we need to update it to 89a. + fn update_to_89a(&mut self, mut dst: impl Write) -> Result<(), CodecError> { + self.src.rewind()?; + + let mut before = [0; 4]; + self.src.read_exact(&mut before)?; + dst.write_all(&before)?; + + // 0x39 is 9 in ASCII. + dst.write_u8(0x39)?; + self.src.seek(SeekFrom::Current(1))?; + + Ok(()) + } +} + +impl Decode for GifCodec { + fn read_c2pa(&mut self) -> Result>, CodecError> { + Ok(self + .find_c2pa_block()? + .map(|marker| marker.block.data_sub_blocks.to_decoded_bytes())) + } + + fn read_xmp(&mut self) -> Result, CodecError> { + let bytes = self + .find_xmp_block()? + .map(|marker| marker.block.data_sub_blocks.to_decoded_bytes()); + match bytes { + Some(mut bytes) => { + // TODO: this should be validated on construction + // Validate the 258-byte XMP magic trailer (excluding terminator). + if let Some(byte) = bytes.get(bytes.len() - 257) { + if *byte != 1 { + return Err(CodecError::InvalidXmpBlock); + } + } + for (i, byte) in bytes.iter().rev().take(256).enumerate() { + if *byte != i as u8 { + return Err(CodecError::InvalidXmpBlock); + } + } + + bytes.truncate(bytes.len() - 257); + String::from_utf8(bytes) + .map(Some) + .map_err(|_| CodecError::InvalidXmpBlock) + } + None => Ok(None), + } + } +} + +impl Span for GifCodec { + fn span(&mut self) -> Result { + Ok(DefaultSpan::Data(self.c2pa_span()?)) + } + + fn c2pa_span(&mut self) -> Result { + let c2pa_block = self.find_c2pa_block()?; + match c2pa_block { + Some(c2pa_block) => Ok(C2paSpan { + spans: vec![ByteSpan { + start: c2pa_block.start(), + len: c2pa_block.len(), + }], + }), + None => { + self.skip_preamble()?; + + let end_preamble_pos = self.src.stream_position()?; + Ok(C2paSpan { + spans: vec![ByteSpan { + start: end_preamble_pos, + len: 1, // Need at least size 1. + }], + }) + } + } + } + + fn box_span(&mut self) -> Result { + let c2pa_block_exists = self.find_c2pa_block()?.is_some(); + + Blocks::new(&mut self.src)? + .try_fold( + (Vec::new(), None, 0), + |(mut named_spans, last_marker, mut offset), + marker| + -> Result<(Vec<_>, Option>, u64), CodecError> { + let marker = marker?; + + // If the C2PA block doesn't exist, we need to insert a placeholder after the global color table + // if it exists, or otherwise after the logical screen descriptor. + if !c2pa_block_exists { + if let Some(last_marker) = last_marker.as_ref() { + let should_insert_placeholder = match last_marker.block { + Block::GlobalColorTable(_) => true, + // If the current block is a global color table, then wait til the next iteration to insert. + Block::LogicalScreenDescriptor(_) + if !matches!(marker.block, Block::GlobalColorTable(_)) => + { + true + } + _ => false, + }; + if should_insert_placeholder { + offset += 1; + named_spans.push( + BlockMarker { + block: Block::ApplicationExtension( + ApplicationExtension::new_c2pa(&[]), + ), + start: marker.start, + len: 1, + } + .to_named_byte_span()?, + ); + } + } + } + + // According to C2PA spec, these blocks must be grouped into the same box map. + match marker.block { + // If it's a local color table, then an image descriptor MUST have come before it. + // If it's a global color table, then a logical screen descriptor MUST have come before it. + Block::LocalColorTable(_) | Block::GlobalColorTable(_) => { + match named_spans.last_mut() { + Some(last_named_span) => last_named_span.span.len += marker.len(), + // Realistically, this case is unreachable, but to play it safe, we error. + None => { + return Err(CodecError::InvalidAsset { + src: None, + context: "TODO".to_string(), + }) + } + } + } + _ => { + let mut named_span = marker.to_named_byte_span()?; + named_span.span.start += offset; + named_spans.push(named_span); + } + } + Ok((named_spans, Some(marker), offset)) + }, + ) + .map(|(named_spans, _, _)| BoxSpan { spans: named_spans }) + } +} + +impl GifCodec { + fn skip_preamble(&mut self) -> Result<(), CodecError> { + self.src.rewind()?; + + Header::from_stream(&mut self.src)?; + let logical_screen_descriptor = LogicalScreenDescriptor::from_stream(&mut self.src)?; + if logical_screen_descriptor.color_table_flag { + GlobalColorTable::from_stream( + &mut self.src, + logical_screen_descriptor.color_resolution, + )?; + } + + Ok(()) + } + + // According to spec, C2PA blocks must come before the first image descriptor. + fn find_c2pa_block(&mut self) -> Result>, CodecError> { + Self::find_app_block_from_iterator( + ApplicationExtensionKind::C2pa, + Blocks::new(&mut self.src)?.take_while(|marker| { + !matches!( + marker, + Ok(BlockMarker { + block: Block::ImageDescriptor(_), + .. + }) + ) + }), + ) + } + + fn find_xmp_block(&mut self) -> Result>, CodecError> { + Self::find_app_block_from_iterator( + ApplicationExtensionKind::Xmp, + Blocks::new(&mut self.src)?, + ) + } + + fn find_app_block_from_iterator( + kind: ApplicationExtensionKind, + mut iterator: impl Iterator, CodecError>>, + ) -> Result>, CodecError> { + iterator + .find_map(|marker| match marker { + Ok(marker) => match marker.block { + Block::ApplicationExtension(app_ext) if app_ext.kind() == kind => { + Some(Ok(BlockMarker { + start: marker.start, + len: marker.len, + block: app_ext, + })) + } + _ => None, + }, + Err(err) => Some(Err(err)), + }) + .transpose() + } +} + +struct Blocks { + next: Option>, + stream: R, + reached_trailer: bool, +} + +impl Blocks { + fn new(mut stream: R) -> Result, CodecError> { + stream.rewind()?; + + let start = stream.stream_position()?; + let block = Block::Header(Header::from_stream(&mut stream)?); + let end = stream.stream_position()?; + + Ok(Blocks { + next: Some(BlockMarker { + len: end - start, + start, + block, + }), + stream, + reached_trailer: false, + }) + } + + fn parse_next(&mut self) -> Result, CodecError> { + match self.next.take() { + Some(marker) => { + self.next = marker.block.next_block_hint(&mut self.stream)?; + Ok(marker) + } + None => { + let marker = Block::from_stream(&mut self.stream)?; + self.next = marker.block.next_block_hint(&mut self.stream)?; + + if let Block::Trailer = marker.block { + self.reached_trailer = true; + } + + Ok(marker) + } + } + } +} + +impl Iterator for Blocks { + type Item = Result, CodecError>; + + fn next(&mut self) -> Option { + match self.reached_trailer { + true => None, + false => match self.parse_next() { + Ok(marker) => Some(Ok(marker)), + Err(err) => Some(Err(err)), + }, + } + } +} + +#[derive(Debug, Clone, PartialEq)] +struct BlockMarker { + start: u64, + len: u64, + block: T, +} + +impl BlockMarker { + fn len(&self) -> u64 { + self.len + } + + fn start(&self) -> u64 { + self.start + } +} + +impl BlockMarker { + fn to_named_byte_span(&self) -> Result { + let mut names = Vec::new(); + if let Some(name) = self.block.box_id() { + names.push(name.to_owned()); + } + + Ok(NamedByteSpan { + names, + span: ByteSpan { + start: self.start(), + len: self.len(), + }, + }) + } +} + +impl From> for BlockMarker { + fn from(value: BlockMarker) -> Self { + BlockMarker { + start: value.start, + len: value.len, + block: Block::ApplicationExtension(value.block), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +enum Block { + Header(Header), + LogicalScreenDescriptor(LogicalScreenDescriptor), + GlobalColorTable(GlobalColorTable), + GraphicControlExtension(GraphicControlExtension), + PlainTextExtension(PlainTextExtension), + ApplicationExtension(ApplicationExtension), + CommentExtension(CommentExtension), + ImageDescriptor(ImageDescriptor), + LocalColorTable(LocalColorTable), + ImageData(ImageData), + Trailer, +} + +impl Block { + fn from_stream(mut stream: impl Read + Seek) -> Result, CodecError> { + let start = stream.stream_position()?; + + let ext_introducer = stream.read_u8()?; + let block = match ext_introducer { + 0x21 => { + let ext_label = stream.read_u8()?; + match ext_label { + 0xff => Ok(Block::ApplicationExtension( + ApplicationExtension::from_stream(&mut stream)?, + )), + 0xfe => Ok(Block::CommentExtension(CommentExtension::from_stream( + &mut stream, + )?)), + 0xf9 => Ok(Block::GraphicControlExtension( + GraphicControlExtension::from_stream(&mut stream)?, + )), + 0x21 => Ok(Block::PlainTextExtension(PlainTextExtension::from_stream( + &mut stream, + )?)), + ext_label => Err(CodecError::InvalidAsset { + src: None, + context: format!("Invalid block extension label: {ext_label}"), + }), + } + } + 0x2c => Ok(Block::ImageDescriptor(ImageDescriptor::from_stream( + &mut stream, + )?)), + 0x3b => Ok(Block::Trailer), + ext_introducer => Err(CodecError::InvalidAsset { + src: None, + context: format!("Invalid block id: {ext_introducer}"), + }), + }?; + + let end = stream.stream_position()?; + Ok(BlockMarker { + start, + len: end - start, + block, + }) + } + + // Some blocks MUST come after other blocks, this function ensures that. + fn next_block_hint( + &self, + mut stream: impl Read + Seek, + ) -> Result>, CodecError> { + let start = stream.stream_position()?; + let next_block = match self { + Block::Header(_) => Some(Block::LogicalScreenDescriptor( + LogicalScreenDescriptor::from_stream(&mut stream)?, + )), + Block::LogicalScreenDescriptor(logical_screen_descriptor) => { + match logical_screen_descriptor.color_table_flag { + true => Some(Block::GlobalColorTable(GlobalColorTable::from_stream( + &mut stream, + logical_screen_descriptor.color_resolution, + )?)), + false => None, + } + } + Block::GlobalColorTable(_) => None, + // Block::GraphicControlExtension(_) => match stream.read_u8()? { + // 0x21 => match stream.read_u8()? { + // 0x01 => Some(Block::PlainTextExtension(PlainTextExtension::from_stream( + // stream, + // )?)), + // ext_label => { + // return Err(Error::InvalidAsset(format!( + // "Block extension `{ext_label}` cannot come after graphic control extension" + // ))) + // } + // }, + // 0x2c => Some(Block::ImageDescriptor(ImageDescriptor::from_stream( + // stream, + // )?)), + // ext_introducer => { + // return Err(Error::InvalidAsset(format!( + // "Block id `{ext_introducer}` cannot come after graphic control extension" + // ))) + // } + // }, + // In a valid GIF, a plain text extension or image descriptor MUST come after a graphic control extension. + // However, it turns out not even our sample GIF follows this restriction! Since we don't really care about + // the correctness of the GIF (more so that our modifications are correct), we ignore this restriction. + Block::GraphicControlExtension(_) => None, + Block::PlainTextExtension(_) => None, + Block::ApplicationExtension(_) => None, + Block::CommentExtension(_) => None, + Block::ImageDescriptor(image_descriptor) => { + match image_descriptor.local_color_table_flag { + true => Some(Block::LocalColorTable(LocalColorTable::from_stream( + &mut stream, + image_descriptor.local_color_table_size, + )?)), + false => Some(Block::ImageData(ImageData::from_stream(&mut stream)?)), + } + } + Block::LocalColorTable(_) => { + Some(Block::ImageData(ImageData::from_stream(&mut stream)?)) + } + Block::ImageData(_) => None, + Block::Trailer => None, + }; + + let end = stream.stream_position()?; + Ok(next_block.map(|block| BlockMarker { + len: end - start, + start, + block, + })) + } + + fn box_id(&self) -> Option<&'static str> { + match self { + Block::Header(_) => Some("GIF89a"), + Block::LogicalScreenDescriptor(_) => Some("LSD"), + Block::GlobalColorTable(_) => None, + Block::GraphicControlExtension(_) => Some("21F9"), + Block::PlainTextExtension(_) => Some("2101"), + Block::ApplicationExtension(application_extension) => { + match ApplicationExtensionKind::C2pa == application_extension.kind() { + true => Some("C2PA"), + false => Some("21FF"), + } + } + Block::CommentExtension(_) => Some("21FE"), + Block::ImageDescriptor(_) => Some("2C"), + Block::LocalColorTable(_) => None, + Block::ImageData(_) => Some("TBID"), + Block::Trailer => Some("3B"), + } + } + + fn to_bytes(&self) -> Result, CodecError> { + match self { + Block::ApplicationExtension(app_ext) => Ok(app_ext.to_bytes()), + // We only care about app extensions. + _ => Err(CodecError::Unsupported), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +struct Header {} + +impl Header { + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(6))?; + + Ok(Header {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct LogicalScreenDescriptor { + color_table_flag: bool, + color_resolution: u8, +} + +impl LogicalScreenDescriptor { + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(4))?; + + let packed = stream.read_u8()?; + let color_table_flag = (packed >> 7) & 1; + let color_resolution = (packed >> 4) & 0b111; + + stream.seek(SeekFrom::Current(2))?; + + Ok(LogicalScreenDescriptor { + color_table_flag: color_table_flag != 0, + color_resolution, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct GlobalColorTable {} + +impl GlobalColorTable { + fn from_stream( + mut stream: impl Read + Seek, + color_resolution: u8, + ) -> Result { + stream.seek(SeekFrom::Current( + 3 * (2_i64.pow(color_resolution as u32 + 1)), + ))?; + + Ok(GlobalColorTable {}) + } +} + +#[derive(Debug, PartialEq)] +enum ApplicationExtensionKind { + C2pa, + Xmp, + Unknown, +} + +#[derive(Debug, Clone, PartialEq)] +struct ApplicationExtension { + identifier: [u8; 8], + authentication_code: [u8; 3], + data_sub_blocks: DataSubBlocks, +} + +impl ApplicationExtension { + fn new_c2pa(bytes: &[u8]) -> ApplicationExtension { + ApplicationExtension { + identifier: *b"C2PA_GIF", + authentication_code: [0x01, 0x00, 0x00], + data_sub_blocks: DataSubBlocks::from_decoded_bytes(bytes), + } + } + + fn new_xmp(mut bytes: Vec) -> ApplicationExtension { + // Add XMP magic trailer. + bytes.reserve(257); + bytes.push(1); + for byte in (0..=255).rev() { + bytes.push(byte); + } + + ApplicationExtension { + identifier: *b"XMP Data", + authentication_code: [0x58, 0x4d, 0x50], + data_sub_blocks: DataSubBlocks::from_decoded_bytes(&bytes), + } + } + + fn from_stream(mut stream: impl Read + Seek) -> Result { + let app_block_size = stream.read_u8()?; + // App block size is a fixed value. + if app_block_size != 0x0b { + return Err(CodecError::InvalidAsset { + src: None, + context: format!( + "Invalid block size for app block extension {}!=11", + app_block_size + ), + }); + } + + let mut app_id = [0u8; 8]; + stream.read_exact(&mut app_id)?; + + let mut app_auth_code = [0u8; 3]; + stream.read_exact(&mut app_auth_code)?; + + let mut app_block_ext = ApplicationExtension { + identifier: app_id, + authentication_code: app_auth_code, + data_sub_blocks: DataSubBlocks::empty(), + }; + + match app_block_ext.kind() { + ApplicationExtensionKind::C2pa | ApplicationExtensionKind::Xmp => { + app_block_ext.data_sub_blocks = DataSubBlocks::from_encoded_stream(stream)?; + } + // Ignore caching unknown app blocks as we don't need it. + ApplicationExtensionKind::Unknown => { + DataSubBlocks::from_encoded_stream_and_skip(stream)?; + } + }; + + Ok(app_block_ext) + } + + fn kind(&self) -> ApplicationExtensionKind { + match (&self.identifier, self.authentication_code) { + (b"C2PA_GIF", [0x01, 0x00, 0x00]) => ApplicationExtensionKind::C2pa, + (b"XMP Data", [0x58, 0x4d, 0x50]) => ApplicationExtensionKind::Xmp, + (_, _) => ApplicationExtensionKind::Unknown, + } + } + + fn to_bytes(&self) -> Vec { + let bytes = self.data_sub_blocks.to_encoded_bytes(); + // The header size + the amount of byte length markers + the amount of bytes stored + terminator. + let mut header = Vec::with_capacity(14 + bytes.len().div_ceil(255) + bytes.len() + 1); + header.push(0x21); + header.push(0xff); + header.push(0x0b); + header.extend_from_slice(&self.identifier); + header.extend_from_slice(&self.authentication_code); + header.extend_from_slice(bytes); + header + } +} + +impl From for Block { + fn from(value: ApplicationExtension) -> Self { + Block::ApplicationExtension(value) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct PlainTextExtension {} + +impl PlainTextExtension { + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(11))?; + DataSubBlocks::from_encoded_stream_and_skip(&mut stream)?; + Ok(PlainTextExtension {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct CommentExtension {} + +impl CommentExtension { + fn from_stream(stream: impl Read + Seek) -> Result { + // stream.seek(SeekFrom::Current(0))?; + DataSubBlocks::from_encoded_stream_and_skip(stream)?; + Ok(CommentExtension {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct GraphicControlExtension {} + +impl GraphicControlExtension { + // TODO: validate ext introducer and label, and do that for other extensions? + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(6))?; + Ok(GraphicControlExtension {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct ImageDescriptor { + local_color_table_flag: bool, + local_color_table_size: u8, +} + +impl ImageDescriptor { + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(8))?; + + let packed = stream.read_u8()?; + let local_color_table_flag = (packed >> 7) & 1; + let local_color_table_size = packed & 0b111; + + Ok(ImageDescriptor { + local_color_table_flag: local_color_table_flag != 0, + local_color_table_size, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct LocalColorTable {} + +impl LocalColorTable { + fn from_stream( + mut stream: impl Read + Seek, + local_color_table_size: u8, + ) -> Result { + stream.seek(SeekFrom::Current( + 3 * (2_i64.pow(local_color_table_size as u32 + 1)), + ))?; + Ok(LocalColorTable {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct ImageData {} + +impl ImageData { + fn from_stream(mut stream: impl Read + Seek) -> Result { + stream.seek(SeekFrom::Current(1))?; + DataSubBlocks::from_encoded_stream_and_skip(stream)?; + Ok(ImageData {}) + } +} + +#[derive(Debug, Clone, PartialEq)] +struct DataSubBlocks { + bytes: Vec, +} + +impl DataSubBlocks { + fn empty() -> DataSubBlocks { + // Terminator byte. + DataSubBlocks { bytes: vec![0] } + } + + // fn from_encoded_bytes(bytes: Vec) -> DataSubBlocks { + // DataSubBlocks { bytes } + // } + + fn from_decoded_bytes(bytes: &[u8]) -> DataSubBlocks { + // The amount of length marker bytes + amount of bytes + terminator byte. + let mut data_sub_blocks = Vec::with_capacity(bytes.len().div_ceil(255) + bytes.len() + 1); + for chunk in bytes.chunks(255) { + data_sub_blocks.push(chunk.len() as u8); + data_sub_blocks.extend_from_slice(chunk); + } + + // Add terminator. + data_sub_blocks.push(0); + + DataSubBlocks { + bytes: data_sub_blocks, + } + } + + fn from_encoded_stream(mut stream: impl Read + Seek) -> Result { + let mut data_sub_blocks = Vec::new(); + loop { + let sub_block_size = stream.read_u8()?; + if sub_block_size == 0 { + break; + } + + data_sub_blocks.push(sub_block_size); + + let start = data_sub_blocks.len(); + let end = start + sub_block_size as usize; + data_sub_blocks.resize(end, 0); + + stream.read_exact(&mut data_sub_blocks[start..end])?; + } + + data_sub_blocks.push(0); + + Ok(DataSubBlocks { + bytes: data_sub_blocks, + }) + } + + fn from_encoded_stream_and_skip(mut stream: impl Read + Seek) -> Result { + let mut length = 0; + loop { + let sub_block_size = stream.read_u8()?; + length += 1; + + if sub_block_size == 0 { + break; + } else { + length += sub_block_size as u64; + stream.seek(SeekFrom::Current(sub_block_size as i64))?; + } + } + + Ok(length) + } + + fn to_encoded_bytes(&self) -> &[u8] { + &self.bytes + } + + fn to_decoded_bytes(&self) -> Vec { + // Amount of bytes - (length markers + terminator). + let mut bytes = Vec::with_capacity(self.bytes.len() - (self.bytes.len().div_ceil(255) + 1)); + for chunk in self.bytes.chunks(256) { + bytes.extend_from_slice(&chunk[1..]); + } + + // Remove terminator. + bytes.truncate(bytes.len() - 1); + + bytes + } +} + +#[cfg(test)] +mod tests { + use io::{Cursor, Seek}; + + use super::*; + + const SAMPLE1: &[u8] = include_bytes!("../../../../tests/fixtures/sample1.gif"); + + #[test] + fn test_read_blocks() -> Result<(), CodecError> { + let mut src = Cursor::new(SAMPLE1); + + let blocks: Vec<_> = Blocks::new(&mut src)?.collect::>()?; + assert_eq!( + blocks.first(), + Some(&BlockMarker { + start: 0, + len: 6, + block: Block::Header(Header {}) + }) + ); + assert_eq!( + blocks.get(1), + Some(&BlockMarker { + start: 6, + len: 7, + block: Block::LogicalScreenDescriptor(LogicalScreenDescriptor { + color_table_flag: true, + color_resolution: 7 + }) + }) + ); + assert_eq!( + blocks.get(2), + Some(&BlockMarker { + start: 13, + len: 768, + block: Block::GlobalColorTable(GlobalColorTable {}) + }) + ); + assert_eq!( + blocks.get(3), + Some(&BlockMarker { + start: 781, + len: 19, + block: Block::ApplicationExtension(ApplicationExtension { + identifier: *b"NETSCAPE", + authentication_code: [50, 46, 48], + data_sub_blocks: DataSubBlocks::empty(), + }) + }) + ); + assert_eq!( + blocks.get(4), + Some(&BlockMarker { + start: 800, + len: 8, + block: Block::GraphicControlExtension(GraphicControlExtension {}) + }) + ); + assert_eq!( + blocks.get(5), + Some(&BlockMarker { + start: 808, + len: 52, + block: Block::CommentExtension(CommentExtension {}) + }) + ); + + Ok(()) + } + + #[test] + fn test_write_remove_block() -> Result<(), CodecError> { + let src = Cursor::new(SAMPLE1); + + let mut codec1 = GifCodec::new(src); + + assert!(matches!(codec1.read_c2pa(), Ok(None))); + + let mut dst1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + codec1.write_c2pa(&mut dst1, &random_bytes)?; + + let mut codec2 = GifCodec::new(dst1); + let data_written = codec2.read_c2pa()?; + assert_eq!(data_written.as_deref(), Some(random_bytes.as_slice())); + + let mut dst2 = Cursor::new(Vec::with_capacity(SAMPLE1.len())); + codec2.remove_c2pa(&mut dst2)?; + + let mut codec3 = GifCodec::new(&mut dst2); + assert!(matches!(codec3.read_c2pa(), Ok(None))); + + let mut bytes = Vec::new(); + dst2.rewind()?; + dst2.read_to_end(&mut bytes)?; + assert_eq!(SAMPLE1, bytes); + + Ok(()) + } + + #[test] + fn test_write_insert_two_blocks() -> Result<(), CodecError> { + let src = Cursor::new(SAMPLE1); + + let mut codec = GifCodec::new(src); + + let mut dst1 = Cursor::new(Vec::with_capacity(SAMPLE1.len())); + let test_block = Block::ApplicationExtension(ApplicationExtension { + identifier: *b"12345678", + authentication_code: [0, 0, 0], + data_sub_blocks: DataSubBlocks::empty(), + }); + codec.insert_block(&mut dst1, &test_block)?; + + let mut codec = GifCodec::new(dst1); + let mut dst2 = Cursor::new(Vec::with_capacity(SAMPLE1.len())); + codec.insert_block(&mut dst2, &test_block)?; + + let blocks: Vec<_> = Blocks::new(&mut dst2)?.collect::>()?; + assert_eq!( + blocks.get(3), + Some(&BlockMarker { + start: 781, + len: 15, + block: test_block.clone() + }) + ); + assert_eq!( + blocks.get(4), + Some(&BlockMarker { + start: 796, + len: 15, + block: test_block + }) + ); + + Ok(()) + } + + #[test] + fn test_write_bytes() -> Result<(), CodecError> { + let src = Cursor::new(SAMPLE1); + + let mut codec1 = GifCodec::new(src); + + assert!(matches!(codec1.read_c2pa(), Ok(None))); + + let mut dst = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + codec1.write_c2pa(&mut dst, &random_bytes)?; + + let mut codec2 = GifCodec::new(dst); + let data_written = codec2.read_c2pa()?; + assert_eq!(data_written.as_deref(), Some(random_bytes.as_slice())); + + Ok(()) + } + + #[test] + fn test_write_bytes_replace() -> Result<(), CodecError> { + let mut src = Cursor::new(SAMPLE1); + + let mut codec = GifCodec::new(&mut src); + + assert!(matches!(codec.read_c2pa(), Ok(None))); + + let mut dst1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 7)); + let random_bytes = [1, 2, 3, 4, 3, 2, 1]; + codec.write_c2pa(&mut dst1, &random_bytes)?; + + let mut codec = GifCodec::new(dst1); + let data_written = codec.read_c2pa()?; + assert_eq!(data_written.as_deref(), Some(random_bytes.as_slice())); + + let mut dst2 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 5)); + let random_bytes = [3, 2, 1, 2, 3]; + codec.write_c2pa(&mut dst2, &random_bytes)?; + + let mut codec = GifCodec::new(dst2); + let data_written = codec.read_c2pa()?; + assert_eq!(data_written.as_deref(), Some(random_bytes.as_slice())); + + let mut bytes = Vec::new(); + src.rewind()?; + src.read_to_end(&mut bytes)?; + assert_eq!(SAMPLE1, bytes); + + Ok(()) + } + + #[test] + fn test_data_hash() -> Result<(), CodecError> { + let src = Cursor::new(SAMPLE1); + + let mut codec1 = GifCodec::new(src); + + assert_eq!( + codec1.c2pa_span()?, + C2paSpan { + spans: vec![ByteSpan { start: 781, len: 1 }] + } + ); + + let mut dst1 = Cursor::new(Vec::with_capacity(SAMPLE1.len() + 15 + 4)); + codec1.write_c2pa(&mut dst1, &[1, 2, 3, 4])?; + + let mut codec2 = GifCodec::new(dst1); + assert_eq!( + codec2.c2pa_span()?, + C2paSpan { + spans: vec![ByteSpan { + start: 781, + len: 20 + }] + } + ); + + Ok(()) + } + + #[test] + fn test_box_hash() -> Result<(), CodecError> { + let src = Cursor::new(SAMPLE1); + + let mut codec = GifCodec::new(src); + let box_hash = codec.box_span()?; + assert_eq!( + box_hash.spans.first(), + Some(&NamedByteSpan { + names: vec!["GIF89a".to_owned()], + span: ByteSpan { start: 0, len: 6 } + }) + ); + assert_eq!( + box_hash.spans.get(box_hash.spans.len() / 2), + Some(&NamedByteSpan { + names: vec!["2C".to_owned()], + span: ByteSpan { + start: 368495, + len: 778 + } + }) + ); + assert_eq!( + box_hash.spans.last(), + Some(&NamedByteSpan { + names: vec!["3B".to_owned()], + span: ByteSpan { + start: SAMPLE1.len() as u64, + len: 1 + } + }) + ); + assert_eq!(box_hash.spans.len(), 276); + + Ok(()) + } + + // #[test] + // fn test_composed_manifest() -> Result<(), ParseError> { + // let encoder = GifEncoder {}; + + // let block = encoder.compose_manifest(&[1, 2, 3], "")?; + // assert_eq!( + // block, + // vec![33, 255, 11, 67, 50, 80, 65, 95, 71, 73, 70, 1, 0, 0, 3, 1, 2, 3, 0] + // ); + + // Ok(()) + // } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/jpeg.rs b/sdk/crates/c2pa-codecs/src/codecs/jpeg.rs new file mode 100644 index 000000000..f14d8be40 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/jpeg.rs @@ -0,0 +1,1193 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + collections::HashMap, + io::{BufReader, Cursor, Read, Seek, Write}, +}; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use img_parts::{ + jpeg::{ + markers::{self, APP0, APP15, COM, DQT, DRI, P, RST0, RST7, SOF0, SOF15, SOS, Z}, + Jpeg, JpegSegment, + }, + Bytes, DynImage, +}; + +use crate::{ + xmp::{add_provenance, MIN_XMP}, + BoxSpan, ByteSpan, C2paSpan, CodecError, Decode, DefaultSpan, Embed, Embeddable, Encode, + NamedByteSpan, Span, Support, +}; + +const XMP_SIGNATURE: &[u8] = b"http://ns.adobe.com/xap/1.0/"; +const XMP_SIGNATURE_BUFFER_SIZE: usize = XMP_SIGNATURE.len() + 1; // skip null or space char at end + +const MAX_JPEG_MARKER_SIZE: usize = 64000; // technically it's 64K but a bit smaller is fine + +const C2PA_MARKER: [u8; 4] = [0x63, 0x32, 0x70, 0x61]; + +fn vec_compare(va: &[u8], vb: &[u8]) -> bool { + (va.len() == vb.len()) && // zip stops at the shortest + va.iter() + .zip(vb) + .all(|(a,b)| a == b) +} + +// Return contents of APP1 segment if it is an XMP segment. +fn extract_xmp(seg: &JpegSegment) -> Option { + let contents = seg.contents(); + if contents.starts_with(XMP_SIGNATURE) { + let rest = contents.slice(XMP_SIGNATURE_BUFFER_SIZE..); + String::from_utf8(rest.to_vec()).ok() + } else { + None + } +} + +// Extract XMP from bytes. +fn xmp_from_bytes(asset_bytes: &[u8]) -> Option { + if let Ok(jpeg) = Jpeg::from_bytes(Bytes::copy_from_slice(asset_bytes)) { + let segs = jpeg.segments_by_marker(markers::APP1); + let xmp: Vec = segs.filter_map(extract_xmp).collect(); + match xmp.is_empty() { + true => None, + false => Some(xmp.concat()), + } + } else { + None + } +} + +fn add_required_segs_to_stream( + mut src: impl Read + Seek, + mut dst: impl Write, +) -> Result<(), CodecError> { + let mut buf: Vec = Vec::new(); + src.rewind()?; + src.read_to_end(&mut buf)?; + src.rewind()?; + + let dimg_opt = DynImage::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "Could not parse input JPEG".to_owned(), + })?; + + if let Some(DynImage::Jpeg(jpeg)) = dimg_opt { + // check for JUMBF Seg + let cai_app11 = get_cai_segments(&jpeg)?; // make sure we only check for C2PA segments + + if cai_app11.is_empty() { + // create dummy JUMBF seg + let mut no_bytes: Vec = vec![0; 50]; // enough bytes to be valid + no_bytes.splice(16..20, C2PA_MARKER); // cai UUID signature + let mut aio = JpegCodec::new(src); + aio.write_c2pa(dst, &no_bytes)?; + } else { + // just clone + src.rewind()?; + std::io::copy(&mut src, &mut dst)?; + } + } else { + return Err(CodecError::IncorrectFormat); + } + + Ok(()) +} + +// all cai specific segments +fn get_cai_segments(jpeg: &img_parts::jpeg::Jpeg) -> Result, CodecError> { + let mut cai_segs: Vec = Vec::new(); + + let segments = jpeg.segments(); + + let mut cai_en: Vec = Vec::new(); + let mut cai_seg_cnt: u32 = 0; + + for (i, segment) in segments.iter().enumerate() { + let raw_bytes = segment.contents(); + let seg_type = segment.marker(); + + if raw_bytes.len() > 16 && seg_type == markers::APP11 { + // we need at least 16 bytes in each segment for CAI + let mut raw_vec = raw_bytes.to_vec(); + let _ci = raw_vec.as_mut_slice()[0..2].to_vec(); + let en = raw_vec.as_mut_slice()[2..4].to_vec(); + let mut z_vec = Cursor::new(raw_vec.as_mut_slice()[4..8].to_vec()); + let _z = z_vec.read_u32::()?; + + let is_cai_continuation = vec_compare(&cai_en, &en); + + if cai_seg_cnt > 0 && is_cai_continuation { + cai_seg_cnt += 1; + cai_segs.push(i); + } else { + // check if this is a CAI JUMBF block + let jumb_type = &raw_vec.as_mut_slice()[24..28]; + let is_cai = vec_compare(&C2PA_MARKER, jumb_type); + if is_cai { + cai_segs.push(i); + cai_seg_cnt = 1; + cai_en.clone_from(&en); // store the identifier + } + } + } + } + + Ok(cai_segs) +} + +// delete cai segments +fn delete_cai_segments(jpeg: &mut img_parts::jpeg::Jpeg) -> Result { + let cai_segs = get_cai_segments(jpeg)?; + if cai_segs.is_empty() { + return Ok(false); + } + + let jpeg_segs = jpeg.segments_mut(); + + // remove cai segments + for seg in cai_segs.iter().rev() { + jpeg_segs.remove(*seg); + } + Ok(true) +} + +#[derive(Debug)] +pub struct JpegCodec { + src: R, +} + +impl JpegCodec { + pub fn new(src: R) -> Self { + Self { src } + } +} + +impl Support for JpegCodec<()> { + const MAX_SIGNATURE_LEN: usize = 3; + + fn supports_signature(signature: &[u8]) -> bool { + signature[0..3] == [0xff, 0xd8, 0xff] + } + + fn supports_extension(ext: &str) -> bool { + matches!(ext, "jpg" | "jpeg") + } + + fn supports_mime(mime: &str) -> bool { + matches!(mime, "image/jpeg") + } +} + +impl Decode for JpegCodec { + fn read_c2pa(&mut self) -> Result>, CodecError> { + let mut buffer: Vec = Vec::new(); + + let mut manifest_store_cnt = 0; + + // load the bytes + let mut buf: Vec = Vec::new(); + + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + + let dimg_opt = + DynImage::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "Could not parse input JPEG".to_string(), + })?; + + if let Some(dimg) = dimg_opt { + match dimg { + DynImage::Jpeg(jpeg) => { + let app11 = jpeg.segments_by_marker(markers::APP11); + let mut cai_en: Vec = Vec::new(); + let mut cai_seg_cnt: u32 = 0; + for segment in app11 { + let raw_bytes = segment.contents(); + if raw_bytes.len() > 16 { + // we need at least 16 bytes in each segment for CAI + let mut raw_vec = raw_bytes.to_vec(); + let _ci = raw_vec.as_mut_slice()[0..2].to_vec(); + let en = raw_vec.as_mut_slice()[2..4].to_vec(); + let mut z_vec = Cursor::new(raw_vec.as_mut_slice()[4..8].to_vec()); + let z = z_vec.read_u32::()?; + + let is_cai_continuation = vec_compare(&cai_en, &en); + + if cai_seg_cnt > 0 && is_cai_continuation { + // make sure this is a cai segment for additional segments, + if z <= cai_seg_cnt { + // this a non contiguous segment with same "en" so a bad set of data + // reset and continue to search + cai_en = Vec::new(); + continue; + } + // take out LBox & TBox + buffer.append(&mut raw_vec.as_mut_slice()[16..].to_vec()); + + cai_seg_cnt += 1; + } else if raw_vec.len() > 28 { + // must be at least 28 bytes for this to be a valid JUMBF box + // check if this is a CAI JUMBF block + let jumb_type = &raw_vec.as_mut_slice()[24..28]; + let is_cai = vec_compare(&C2PA_MARKER, jumb_type); + + if is_cai { + if manifest_store_cnt == 1 { + return Err(CodecError::MoreThanOneC2pa); + } + + buffer.append(&mut raw_vec.as_mut_slice()[8..].to_vec()); + cai_seg_cnt = 1; + cai_en.clone_from(&en); // store the identifier + + manifest_store_cnt += 1; + } + } + } + } + } + _ => { + return Err(CodecError::InvalidAsset { + src: None, + context: "Unknown image format".to_string(), + }) + } + }; + } else { + return Err(CodecError::IncorrectFormat); + } + + if buffer.is_empty() { + return Ok(None); + } + + Ok(Some(buffer)) + } + + // Get XMP block + fn read_xmp(&mut self) -> Result, CodecError> { + // load the bytes + let mut buf: Vec = Vec::new(); + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + Ok(xmp_from_bytes(&buf)) + } +} + +impl Encode for JpegCodec { + fn write_c2pa(&mut self, dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + let mut buf = Vec::new(); + // read the whole asset + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + let mut jpeg = Jpeg::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })?; + + // remove existing CAI segments + delete_cai_segments(&mut jpeg)?; + + let jumbf_len = c2pa.len(); + let num_segments = (jumbf_len / MAX_JPEG_MARKER_SIZE) + 1; + let mut seg_chucks = c2pa.chunks(MAX_JPEG_MARKER_SIZE); + + for seg in 1..num_segments + 1 { + /* + If the size of the box payload is less than 2^32-8 bytes, + then all fields except the XLBox field, that is: Le, CI, En, Z, LBox and TBox, + shall be present in all JPEG XT marker segment representing this box, + regardless of whether the marker segments starts this box, + or continues a box started by a former JPEG XT Marker segment. + */ + // we need to prefix the JUMBF with the JPEG XT markers (ISO 19566-5) + // CI: JPEG extensions marker - JP + // En: Box Instance Number - 0x0001 + // (NOTE: can be any unique ID, so we pick one that shouldn't conflict) + // Z: Packet sequence number - 0x00000001... + let ci = vec![0x4a, 0x50]; + let en = vec![0x02, 0x11]; + let z: u32 = u32::try_from(seg).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "Too many JUMBF segments".to_string(), + })?; //seg.to_be_bytes(); + + let mut seg_data = Vec::new(); + seg_data.extend(ci); + seg_data.extend(en); + seg_data.extend(z.to_be_bytes()); + if seg > 1 { + // the LBox and TBox are already in the JUMBF + // but we need to duplicate them in all other segments + let lbox_tbox = &c2pa[..8]; + seg_data.extend(lbox_tbox); + } + if seg_chucks.len() > 0 { + // make sure we have some... + if let Some(next_seg) = seg_chucks.next() { + seg_data.extend(next_seg); + } + } else { + seg_data.extend(c2pa); + } + + let seg_bytes = Bytes::from(seg_data); + let app11_segment = JpegSegment::new_with_contents(markers::APP11, seg_bytes); + jpeg.segments_mut().insert(seg, app11_segment); // we put this in the beginning... + } + + jpeg.encoder().write_to(dst)?; + + Ok(()) + } + + fn remove_c2pa(&mut self, dst: impl Write) -> Result { + let mut buf = Vec::new(); + // read the whole asset + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + let mut jpeg = Jpeg::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })?; + + // remove existing CAI segments + let found = delete_cai_segments(&mut jpeg)?; + + jpeg.encoder().write_to(dst)?; + + Ok(found) + } + + fn write_xmp(&mut self, dst: impl Write, xmp: &str) -> Result<(), CodecError> { + let mut buf = Vec::new(); + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + let mut jpeg = Jpeg::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })?; + + let segments = jpeg.segments_mut(); + let mut xmp_index = None; + for (i, seg) in segments.iter().enumerate() { + if seg.marker() == markers::APP1 && seg.contents().starts_with(XMP_SIGNATURE) { + xmp_index = Some(i); + break; + } + } + + let xmp = format!("http://ns.adobe.com/xap/1.0/\0{}", xmp); + let segment = JpegSegment::new_with_contents(markers::APP1, Bytes::from(xmp.to_string())); + match xmp_index { + Some(i) => segments[i] = segment, + None => segments.insert(1, segment), + } + + jpeg.encoder().write_to(dst)?; + + Ok(()) + } + + fn write_xmp_provenance( + &mut self, + dst: impl Write, + provenance: &str, + ) -> Result<(), CodecError> { + let mut buf = Vec::new(); + // read the whole asset + self.src.rewind()?; + self.src.read_to_end(&mut buf)?; + let mut jpeg = Jpeg::from_bytes(buf.into()).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })?; + + // find any existing XMP segment and remember where it was + let mut xmp = MIN_XMP.to_string(); // default minimal XMP + let mut xmp_index = None; + let segments = jpeg.segments_mut(); + for (i, seg) in segments.iter().enumerate() { + if seg.marker() == markers::APP1 && seg.contents().starts_with(XMP_SIGNATURE) { + xmp = extract_xmp(seg).unwrap_or_else(|| xmp.clone()); + xmp_index = Some(i); + break; + } + } + // add provenance and JPEG XMP prefix + let xmp = format!( + "http://ns.adobe.com/xap/1.0/\0{}", + add_provenance(&xmp, provenance)? + ); + let segment = JpegSegment::new_with_contents(markers::APP1, Bytes::from(xmp)); + // insert or add the segment + match xmp_index { + Some(i) => segments[i] = segment, + None => segments.insert(1, segment), + } + + jpeg.encoder().write_to(dst)?; + + Ok(()) + } +} + +impl Span for JpegCodec { + fn span(&mut self) -> Result { + Ok(DefaultSpan::Data(self.c2pa_span()?)) + } + + fn c2pa_span(&mut self) -> Result { + let mut cai_en: Vec = Vec::new(); + let mut cai_seg_cnt: u32 = 0; + + let mut positions: Vec = Vec::new(); + let mut curr_offset = 2; // start after JPEG marker + + let output_vec: Vec = Vec::new(); + let mut output_stream = Cursor::new(output_vec); + // make sure the file has the required segments so we can generate all the required offsets + add_required_segs_to_stream(&mut self.src, &mut output_stream)?; + + let buf: Vec = output_stream.into_inner(); + + let dimg = DynImage::from_bytes(buf.into()) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })? + .ok_or(CodecError::IncorrectFormat)?; + + match dimg { + DynImage::Jpeg(jpeg) => { + for seg in jpeg.segments() { + match seg.marker() { + markers::APP11 => { + // JUMBF marker + let raw_bytes = seg.contents(); + + if raw_bytes.len() > 16 { + // we need at least 16 bytes in each segment for CAI + let mut raw_vec = raw_bytes.to_vec(); + let _ci = raw_vec.as_mut_slice()[0..2].to_vec(); + let en = raw_vec.as_mut_slice()[2..4].to_vec(); + + let is_cai_continuation = vec_compare(&cai_en, &en); + + if cai_seg_cnt > 0 && is_cai_continuation { + cai_seg_cnt += 1; + + let v = ByteSpan { + start: curr_offset, + len: seg.len_with_entropy() as u64, + }; + positions.push(v); + } else { + // check if this is a CAI JUMBF block + let jumb_type = raw_vec.as_mut_slice()[24..28].to_vec(); + let is_cai = vec_compare(&C2PA_MARKER, &jumb_type); + if is_cai { + cai_seg_cnt = 1; + cai_en.clone_from(&en); // store the identifier + + let v = ByteSpan { + start: curr_offset, + len: seg.len_with_entropy() as u64, + }; + + positions.push(v); + // } else { + // // save other for completeness sake + // let v = HashObjectPositions { + // offset: curr_offset, + // length: seg.len_with_entropy(), + // htype: HashBlockObjectType::Other, + // }; + // positions.push(v); + } + } + } + } + markers::APP1 => { + // // XMP marker or EXIF or Extra XMP + // let v = HashObjectPositions { + // offset: curr_offset, + // length: seg.len_with_entropy(), + // htype: HashBlockObjectType::Xmp, + // }; + // todo: pick the app1 that is the xmp (not crucial as it gets hashed either way) + // positions.push(v); + } + _ => { + // // save other for completeness sake + // let v = HashObjectPositions { + // offset: curr_offset, + // length: seg.len_with_entropy(), + // htype: HashBlockObjectType::Other, + // }; + + // positions.push(v); + } + } + curr_offset += seg.len_with_entropy() as u64; + } + } + _ => return Err(CodecError::IncorrectFormat), + } + + Ok(C2paSpan { spans: positions }) + } + + fn box_span(&mut self) -> Result { + let mut box_maps = make_box_maps(&mut self.src)?; + + for bm in box_maps.iter_mut() { + if bm.names[0] == "C2PA" { + continue; + } + + self.src.seek(std::io::SeekFrom::Start(bm.span.start))?; + + let size = if bm.names[0] == "SOS" { + let mut size = get_seg_size(&mut self.src)?; + + self.src + .seek(std::io::SeekFrom::Start(bm.span.start + size as u64))?; + + size += get_entropy_size(&mut self.src)?; + + size + } else { + get_seg_size(&mut self.src)? + }; + + bm.span.start = size as u64; + } + + Ok(BoxSpan { spans: box_maps }) + } +} + +fn in_entropy(marker: u8) -> bool { + matches!(marker, RST0..=RST7 | Z) +} + +// img-parts does not correctly return the true size of the SOS segment. This utility +// finds the correct break point for single image JPEGs. We will need a new JPEG decoder +// to handle those. Also this function can be removed if img-parts ever addresses this issue +// and support MPF JPEGs. +fn get_entropy_size(src: impl Read + Seek) -> Result { + // Search the entropy data looking for non entropy segment marker. The first valid seg marker before we hit + // end of the file. + + let mut buf_reader = BufReader::new(src); + + let mut size = 0; + + loop { + let curr_byte = buf_reader.read_u8()?; + if curr_byte == P { + let next_byte = buf_reader.read_u8()?; + + if !in_entropy(next_byte) { + break; + } else { + size += 1; + } + } + size += 1; + } + + Ok(size) +} + +fn has_length(marker: u8) -> bool { + matches!(marker, RST0..=RST7 | APP0..=APP15 | SOF0..=SOF15 | SOS | COM | DQT | DRI) +} + +fn get_seg_size(mut src: impl Read + Seek) -> Result { + let p = src.read_u8()?; + let marker = if p == P { + src.read_u8()? + } else { + return Err(CodecError::InvalidAsset { + src: None, + context: "Cannot read segment marker".to_string(), + }); + }; + + if has_length(marker) { + let val: usize = src.read_u16::()? as usize; + Ok(val + 2) + } else { + Ok(2) + } +} + +fn make_box_maps(mut src: impl Read + Seek) -> Result, CodecError> { + let segment_names = HashMap::from([ + (0xe0u8, "APP0"), + (0xe1u8, "APP1"), + (0xe2u8, "APP2"), + (0xe3u8, "APP3"), + (0xe4u8, "APP4"), + (0xe5u8, "APP5"), + (0xe6u8, "APP6"), + (0xe7u8, "APP7"), + (0xe8u8, "APP8"), + (0xe9u8, "APP9"), + (0xeau8, "APP10"), + (0xebu8, "APP11"), + (0xecu8, "APP12"), + (0xedu8, "APP13"), + (0xeeu8, "APP14"), + (0xefu8, "APP15"), + (0xfeu8, "COM"), + (0xc4u8, "DHT"), + (0xdbu8, "DQT"), + (0xddu8, "DRI"), + (0xd9u8, "EOI"), + (0xd0u8, "RST0"), + (0xd1u8, "RST1"), + (0xd2u8, "RST2"), + (0xd3u8, "RST3"), + (0xd4u8, "RST4"), + (0xd5u8, "RST5"), + (0xd6u8, "RST6"), + (0xd7u8, "RST7"), + (0xc0u8, "SOF0"), + (0xc1u8, "SOF1"), + (0xc2u8, "SOF2"), + (0xd8u8, "SOI"), + (0xdau8, "SOS"), + (0xf0u8, "JPG0"), + (0xf1u8, "JPG1"), + (0xf2u8, "JPG2"), + (0xf3u8, "JPG3"), + (0xf4u8, "JPG4"), + (0xf5u8, "JPG5"), + (0xf6u8, "JPG6"), + (0xf7u8, "JPG7"), + (0xf8u8, "JPG8"), + (0xf9u8, "JPG9"), + (0xfau8, "JPG10"), + (0xfbu8, "JPG11"), + (0xfcu8, "JPG12"), + (0xfdu8, "JPG13"), + ]); + + let mut box_maps = Vec::new(); + let mut cai_en: Vec = Vec::new(); + let mut cai_seg_cnt: u32 = 0; + let mut cai_index = 0; + + src.rewind()?; + + let buf_reader = BufReader::new(src); + let mut reader = jfifdump::Reader::new(buf_reader).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "could not read JPEG segments".to_string(), + })?; + + while let Ok(seg) = reader.next_segment() { + match seg.kind { + jfifdump::SegmentKind::Eoi => { + let bm = NamedByteSpan { + names: vec!["EOI".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Soi => { + let bm = NamedByteSpan { + names: vec!["SOI".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::App { nr, data } if nr == 0x0b => { + let nr = nr | 0xe0; + + // JUMBF marker + let raw_bytes = data; + + if raw_bytes.len() > 16 { + // we need at least 16 bytes in each segment for CAI + let mut raw_vec = raw_bytes.to_vec(); + let _ci = raw_vec.as_mut_slice()[0..2].to_vec(); + let en = raw_vec.as_mut_slice()[2..4].to_vec(); + + let is_cai_continuation = vec_compare(&cai_en, &en); + + if cai_seg_cnt > 0 && is_cai_continuation { + cai_seg_cnt += 1; + + let cai_bm = &mut box_maps[cai_index]; + cai_bm.span.len += raw_bytes.len() as u64 + 4; + } else { + // check if this is a CAI JUMBF block + let jumb_type = raw_vec.as_mut_slice()[24..28].to_vec(); + let is_cai = vec_compare(&C2PA_MARKER, &jumb_type); + if is_cai { + cai_seg_cnt = 1; + cai_en.clone_from(&en); // store the identifier + + let c2pa_bm = NamedByteSpan { + names: vec!["C2PA".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: raw_bytes.len() as u64 + 4, + }, + }; + + box_maps.push(c2pa_bm); + cai_index = box_maps.len() - 1; + } else { + let name = segment_names.get(&nr).ok_or(CodecError::InvalidAsset { + src: None, + context: "Unknown segment marker".to_owned(), + })?; + + let bm = NamedByteSpan { + names: vec![name.to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + } + } + } + jfifdump::SegmentKind::App { nr, data } => { + let nr = nr | 0xe0; + let _data = data; + + let name = segment_names.get(&nr).ok_or(CodecError::InvalidAsset { + src: None, + context: "Unknown segment marker".to_owned(), + })?; + + let bm = NamedByteSpan { + names: vec![name.to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::App0Jfif(_) => { + let bm = NamedByteSpan { + names: vec!["APP0".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Dqt(_) => { + let bm = NamedByteSpan { + names: vec!["DQT".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Dht(_) => { + let bm = NamedByteSpan { + names: vec!["DHT".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Dac(_) => { + let bm = NamedByteSpan { + names: vec!["DAC".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Frame(f) => { + let name = segment_names.get(&f.sof).ok_or(CodecError::InvalidAsset { + src: None, + context: "Unknown segment marker".to_owned(), + })?; + + let bm = NamedByteSpan { + names: vec![name.to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Scan(_s) => { + let bm = NamedByteSpan { + names: vec!["SOS".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Dri(_) => { + let bm = NamedByteSpan { + names: vec!["DRI".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Rst(_r) => (), + jfifdump::SegmentKind::Comment(_) => { + let bm = NamedByteSpan { + names: vec!["COM".to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + jfifdump::SegmentKind::Unknown { marker, data: _ } => { + let name = segment_names.get(&marker).ok_or(CodecError::InvalidAsset { + src: None, + context: "Unknown segment marker".to_owned(), + })?; + + let bm = NamedByteSpan { + names: vec![name.to_string()], + span: ByteSpan { + start: seg.position as u64, + len: 0, + }, + }; + + box_maps.push(bm); + } + } + } + + Ok(box_maps) +} + +impl Embed for JpegCodec { + fn embeddable(bytes: &[u8]) -> Result { + let jumbf_len = bytes.len(); + let num_segments = (jumbf_len / MAX_JPEG_MARKER_SIZE) + 1; + let mut seg_chucks = bytes.chunks(MAX_JPEG_MARKER_SIZE); + + let mut segments = Vec::new(); + + for seg in 1..num_segments + 1 { + /* + If the size of the box payload is less than 2^32-8 bytes, + then all fields except the XLBox field, that is: Le, CI, En, Z, LBox and TBox, + shall be present in all JPEG XT marker segment representing this box, + regardless of whether the marker segments starts this box, + or continues a box started by a former JPEG XT Marker segment. + */ + // we need to prefix the JUMBF with the JPEG XT markers (ISO 19566-5) + // CI: JPEG extensions marker - JP + // En: Box Instance Number - 0x0001 + // (NOTE: can be any unique ID, so we pick one that shouldn't conflict) + // Z: Packet sequence number - 0x00000001... + let ci = vec![0x4a, 0x50]; + let en = vec![0x02, 0x11]; + let z: u32 = u32::try_from(seg).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "Too many JUMBF segments".to_string(), + })?; //seg.to_be_bytes(); + + let mut seg_data = Vec::new(); + seg_data.extend(ci); + seg_data.extend(en); + seg_data.extend(z.to_be_bytes()); + if seg > 1 { + // the LBox and TBox are already in the JUMBF + // but we need to duplicate them in all other segments + let lbox_tbox = &bytes[..8]; + seg_data.extend(lbox_tbox); + } + if seg_chucks.len() > 0 { + // make sure we have some... + if let Some(next_seg) = seg_chucks.next() { + seg_data.extend(next_seg); + } + } else { + seg_data.extend(bytes); + } + + let seg_bytes = Bytes::from(seg_data); + let app11_segment = JpegSegment::new_with_contents(markers::APP11, seg_bytes); + segments.push(app11_segment); + } + + let output = Vec::with_capacity(bytes.len() * 2); + let mut out_stream = Cursor::new(output); + + // right out segments + for s in segments { + // maker + out_stream.write_u8(markers::P)?; + out_stream.write_u8(s.marker())?; + + //len + out_stream.write_u16::(s.contents().len() as u16 + 2)?; + + // data + out_stream.write_all(s.contents())?; + } + + Ok(Embeddable { + bytes: out_stream.into_inner(), + }) + } + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError> { + todo!() + } +} + +#[cfg(test)] +pub mod tests { + // #![allow(clippy::unwrap_used)] + + // use std::io::{Read, Seek}; + + // #[cfg(target_arch = "wasm32")] + // use wasm_bindgen_test::*; + + // use super::*; + // #[test] + // fn test_extract_xmp() { + // let contents = Bytes::from_static(b"http://ns.adobe.com/xap/1.0/\0stuff"); + // let seg = JpegSegment::new_with_contents(markers::APP1, contents); + // let result = extract_xmp(&seg); + // assert_eq!(result, Some("stuff".to_owned())); + + // let contents = Bytes::from_static(b"http://ns.adobe.com/xap/1.0/ stuff"); + // let seg = JpegSegment::new_with_contents(markers::APP1, contents); + // let result = extract_xmp(&seg); + // assert_eq!(result, Some("stuff".to_owned())); + + // let contents = Bytes::from_static(b"tiny"); + // let seg = JpegSegment::new_with_contents(markers::APP1, contents); + // let result = extract_xmp(&seg); + // assert_eq!(result, None); + // } + + // #[test] + // fn test_remove_c2pa() { + // let source = crate::utils::test::fixture_path("CA.jpg"); + + // let temp_dir = tempfile::tempdir().unwrap(); + // let output = crate::utils::test::temp_dir_path(&temp_dir, "CA_test.jpg"); + + // std::fs::copy(source, &output).unwrap(); + // let jpeg_io = JpegCodec {}; + + // jpeg_io.remove_cai_store(&output).unwrap(); + + // // read back in asset, JumbfNotFound is expected since it was removed + // match jpeg_io.read_cai_store(&output) { + // Err(Error::JumbfNotFound) => (), + // _ => unreachable!(), + // } + // } + + // #[test] + // fn test_remove_c2pa_from_stream() { + // let source = crate::utils::test::fixture_path("CA.jpg"); + + // let source_bytes = std::fs::read(source).unwrap(); + // let mut source_stream = Cursor::new(source_bytes); + + // let jpeg_io = JpegCodec {}; + // let jpg_writer = jpeg_io.get_writer("jpg").unwrap(); + + // let output_bytes = Vec::new(); + // let mut output_stream = Cursor::new(output_bytes); + + // jpg_writer + // .remove_cai_store_from_stream(&mut source_stream, &mut output_stream) + // .unwrap(); + + // // read back in asset, JumbfNotFound is expected since it was removed + // let jpg_reader = jpeg_io.get_reader(); + // match jpg_reader.read_cai(&mut output_stream) { + // Err(Error::JumbfNotFound) => (), + // _ => unreachable!(), + // } + // } + + // #[test] + // fn test_xmp_read_write() { + // let source = crate::utils::test::fixture_path("CA.jpg"); + + // let temp_dir = tempfile::tempdir().unwrap(); + // let output = crate::utils::test::temp_dir_path(&temp_dir, "CA_test.jpg"); + + // std::fs::copy(source, &output).unwrap(); + + // let test_msg = "this some test xmp data"; + // let handler = JpegCodec::new(""); + + // // write xmp + // let assetio_handler = handler.get_handler("jpg"); + + // let remote_ref_handler = assetio_handler.remote_ref_writer_ref().unwrap(); + + // remote_ref_handler + // .embed_reference(&output, RemoteRefEmbedType::Xmp(test_msg.to_string())) + // .unwrap(); + + // // read back in XMP + // let mut file_reader = std::fs::File::open(&output).unwrap(); + // let read_xmp = assetio_handler + // .get_reader() + // .read_xmp(&mut file_reader) + // .unwrap(); + + // assert!(read_xmp.contains(test_msg)); + // } + + // #[cfg_attr(not(target_arch = "wasm32"), actix::test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + // async fn test_xmp_read_write_stream() { + // let source_bytes = include_bytes!("../../tests/fixtures/CA.jpg"); + + // let test_msg = "this some test xmp data"; + // let handler = JpegCodec::new(""); + + // let assetio_handler = handler.get_handler("jpg"); + + // let remote_ref_handler = assetio_handler.remote_ref_writer_ref().unwrap(); + + // let mut source_stream = Cursor::new(source_bytes.to_vec()); + // let mut output_stream = Cursor::new(Vec::new()); + // remote_ref_handler + // .embed_reference_to_stream( + // &mut source_stream, + // &mut output_stream, + // RemoteRefEmbedType::Xmp(test_msg.to_string()), + // ) + // .unwrap(); + + // output_stream.set_position(0); + + // // read back in XMP + // let read_xmp = assetio_handler + // .get_reader() + // .read_xmp(&mut output_stream) + // .unwrap(); + + // output_stream.set_position(0); + + // //std::fs::write("../target/xmp_write.jpg", output_stream.into_inner()).unwrap(); + + // assert!(read_xmp.contains(test_msg)); + // } + + // #[test] + // fn test_embeddable_manifest() { + // let jpeg_io = JpegCodec {}; + + // let source = crate::utils::test::fixture_path("CA.jpg"); + + // let ol = jpeg_io.get_object_locations(&source).unwrap(); + + // let cai_loc = ol + // .iter() + // .find(|o| o.htype == HashBlockObjectType::Cai) + // .unwrap(); + // let curr_manifest = jpeg_io.read_cai_store(&source).unwrap(); + + // let temp_dir = tempfile::tempdir().unwrap(); + // let output = crate::utils::test::temp_dir_path(&temp_dir, "CA_test.jpg"); + + // std::fs::copy(source, &output).unwrap(); + + // // remove existing + // jpeg_io.remove_cai_store(&output).unwrap(); + + // // generate new manifest data + // let em = jpeg_io + // .composed_data_ref() + // .unwrap() + // .compose_manifest(&curr_manifest, "jpeg") + // .unwrap(); + + // // insert new manifest + // let outbuf = Vec::new(); + // let mut out_stream = Cursor::new(outbuf); + + // let mut before = vec![0u8; cai_loc.offset]; + // let mut in_file = std::fs::File::open(&output).unwrap(); + + // // write before + // in_file.read_exact(before.as_mut_slice()).unwrap(); + // out_stream.write_all(&before).unwrap(); + + // // write composed bytes + // out_stream.write_all(&em).unwrap(); + + // // write bytes after + // let mut after_buf = Vec::new(); + // in_file.read_to_end(&mut after_buf).unwrap(); + // out_stream.write_all(&after_buf).unwrap(); + + // // read manifest back in from new in-memory JPEG + // out_stream.rewind().unwrap(); + // let restored_manifest = jpeg_io.read_cai(&mut out_stream).unwrap(); + + // assert_eq!(&curr_manifest, &restored_manifest); + // } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/mod.rs b/sdk/crates/c2pa-codecs/src/codecs/mod.rs new file mode 100644 index 000000000..47af8b4d3 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/mod.rs @@ -0,0 +1,32 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +// #[cfg(feature = "bmff")] +// pub mod bmff_io; +pub mod c2pa; +#[cfg(feature = "gif")] +pub mod gif; +#[cfg(feature = "jpeg")] +pub mod jpeg; +// #[cfg(feature = "mp3")] +// pub mod mp3_io; +// #[cfg(feature = "pdf")] +// pub mod pdf_io; +// #[cfg(feature = "png")] +// pub mod png_io; +// #[cfg(feature = "riff")] +// pub mod riff_io; +#[cfg(feature = "svg")] +pub mod svg; +// #[cfg(feature = "tiff")] +// pub mod tiff_io; diff --git a/sdk/crates/c2pa-codecs/src/codecs/mp3_io.rs b/sdk/crates/c2pa-codecs/src/codecs/mp3_io.rs new file mode 100644 index 000000000..80b88e551 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/mp3_io.rs @@ -0,0 +1,596 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + fs::{self, File, OpenOptions}, + io::{Cursor, Seek, SeekFrom, Write}, + path::Path, +}; + +use byteorder::{BigEndian, ReadBytesExt}; +use conv::ValueFrom; +use id3::{ + frame::{EncapsulatedObject, Private}, + *, +}; +use memchr::memmem; +use tempfile::Builder; + +use crate::{ + asset_io::{ + rename_or_move, AssetIO, AssetPatch, CAIRead, CAIReadWrapper, CAIReadWrite, + CAIReadWriteWrapper, CAIReader, CAIWriter, HashBlockObjectType, HashObjectPositions, + RemoteRefEmbed, RemoteRefEmbedType, + }, + error::{Error, Result}, + utils::xmp_inmemory_utils::{self, MIN_XMP}, +}; + +static SUPPORTED_TYPES: [&str; 2] = ["mp3", "audio/mpeg"]; + +const GEOB_FRAME_MIME_TYPE: &str = "application/x-c2pa-manifest-store"; +const GEOB_FRAME_FILE_NAME: &str = "c2pa"; +const GEOB_FRAME_DESCRIPTION: &str = "c2pa manifest store"; + +struct ID3V2Header { + _version_major: u8, + _version_minor: u8, + _flags: u8, + tag_size: u32, +} + +impl ID3V2Header { + pub fn read_header(reader: &mut dyn CAIRead) -> Result { + let mut header = [0; 10]; + reader.read_exact(&mut header).map_err(Error::IoError)?; + + if &header[0..3] != b"ID3" { + return Err(Error::UnsupportedType); + } + + let (version_major, version_minor) = (header[3], header[4]); + if !(2..=4).contains(&version_major) { + return Err(Error::UnsupportedType); + } + + let flags = header[5]; + + let mut size_reader = Cursor::new(&header[6..10]); + let encoded_tag_size = size_reader + .read_u32::() + .map_err(|_err| Error::InvalidAsset("could not read mp3 tag size".to_string()))?; + let tag_size = ID3V2Header::decode_tag_size(encoded_tag_size); + + Ok(ID3V2Header { + _version_major: version_major, + _version_minor: version_minor, + _flags: flags, + tag_size, + }) + } + + pub fn get_size(&self) -> u32 { + self.tag_size + 10 + } + + fn decode_tag_size(n: u32) -> u32 { + n & 0xff | (n & 0xff00) >> 1 | (n & 0xff0000) >> 2 | (n & 0xff000000) >> 3 + } +} + +fn get_manifest_pos(input_stream: &mut dyn CAIRead) -> Option<(u64, u32)> { + input_stream.rewind().ok()?; + let header = ID3V2Header::read_header(input_stream).ok()?; + input_stream.rewind().ok()?; + + let reader = CAIReadWrapper { + reader: input_stream, + }; + + if let Ok(tag) = Tag::read_from(reader) { + let mut manifests = Vec::new(); + + for eo in tag.encapsulated_objects() { + if eo.mime_type == GEOB_FRAME_MIME_TYPE { + manifests.push(eo.data.clone()); + } + } + + if manifests.len() == 1 { + input_stream.rewind().ok()?; + + let mut tag_bytes = vec![0u8; header.get_size() as usize]; + input_stream.read_exact(tag_bytes.as_mut_slice()).ok()?; + + let pos = memmem::find(&tag_bytes, &manifests[0])?; + + return Some((pos as u64, manifests[0].len() as u32)); + } + } + None +} + +pub struct Mp3IO { + _mp3_format: String, +} + +impl CAIReader for Mp3IO { + fn read_cai(&self, input_stream: &mut dyn CAIRead) -> Result> { + input_stream.rewind()?; + + let mut manifest: Option> = None; + + if let Ok(tag) = Tag::read_from(input_stream) { + for eo in tag.encapsulated_objects() { + if eo.mime_type == GEOB_FRAME_MIME_TYPE { + match manifest { + Some(_) => { + return Err(Error::TooManyManifestStores); + } + None => manifest = Some(eo.data.clone()), + } + } + } + } + + manifest.ok_or(Error::JumbfNotFound) + } + + fn read_xmp(&self, input_stream: &mut dyn CAIRead) -> Option { + input_stream.rewind().ok()?; + + if let Ok(tag) = Tag::read_from(input_stream) { + for frame in tag.frames() { + if let Content::Private(private) = frame.content() { + if &private.owner_identifier == "XMP" { + return String::from_utf8(private.private_data.clone()).ok(); + } + } + } + } + + None + } +} + +impl RemoteRefEmbed for Mp3IO { + fn embed_reference(&self, asset_path: &Path, embed_ref: RemoteRefEmbedType) -> Result<()> { + match &embed_ref { + RemoteRefEmbedType::Xmp(_) => { + let mut input_stream = File::open(asset_path)?; + let mut output_stream = Cursor::new(Vec::new()); + self.embed_reference_to_stream(&mut input_stream, &mut output_stream, embed_ref)?; + fs::write(asset_path, output_stream.into_inner())?; + Ok(()) + } + _ => Err(Error::UnsupportedType), + } + } + + fn embed_reference_to_stream( + &self, + source_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + embed_ref: RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + RemoteRefEmbedType::Xmp(url) => { + source_stream.rewind()?; + + let header = ID3V2Header::read_header(source_stream)?; + source_stream.rewind()?; + + let mut out_tag = Tag::new(); + + let reader = CAIReadWrapper { + reader: source_stream, + }; + if let Ok(tag) = Tag::read_from(reader) { + for f in tag.frames() { + match f.content() { + Content::Private(private) => { + if &private.owner_identifier != "XMP" { + out_tag.add_frame(f.clone()); + } + } + _ => { + out_tag.add_frame(f.clone()); + } + } + } + } + + let xmp = xmp_inmemory_utils::add_provenance( + &self + .read_xmp(source_stream) + .unwrap_or_else(|| format!("http://ns.adobe.com/xap/1.0/\0 {}", MIN_XMP)), + &url, + )?; + let frame = Frame::with_content( + "PRIV", + Content::Private(Private { + // Null-terminated + owner_identifier: "XMP\0".to_owned(), + private_data: xmp.into_bytes(), + }), + ); + + out_tag.add_frame(frame); + + let writer = CAIReadWriteWrapper { + reader_writer: output_stream, + }; + out_tag + .write_to(writer, Version::Id3v24) + .map_err(|_e| Error::EmbeddingError)?; + + source_stream.seek(SeekFrom::Start(header.get_size() as u64))?; + std::io::copy(source_stream, output_stream)?; + + Ok(()) + } + _ => Err(Error::UnsupportedType), + } + } +} + +fn add_required_frame( + asset_type: &str, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, +) -> Result<()> { + let mp3io = Mp3IO::new(asset_type); + + input_stream.rewind()?; + + match mp3io.read_cai(input_stream) { + Ok(_) => { + // just clone + input_stream.rewind()?; + output_stream.rewind()?; + std::io::copy(input_stream, output_stream)?; + Ok(()) + } + Err(_) => { + input_stream.rewind()?; + mp3io.write_cai(input_stream, output_stream, &[1, 2, 3, 4]) // save arbitrary data + } + } +} + +impl AssetIO for Mp3IO { + fn new(mp3_format: &str) -> Self { + Mp3IO { + _mp3_format: mp3_format.to_string(), + } + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(Mp3IO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(Mp3IO::new(asset_type))) + } + + fn asset_patch_ref(&self) -> Option<&dyn AssetPatch> { + Some(self) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut input_stream = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations( + &self, + asset_path: &std::path::Path, + ) -> Result> { + let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?; + + self.get_object_locations_from_stream(&mut f) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + self.save_cai_store(asset_path, &[]) + } + + fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { + Some(self) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +impl CAIWriter for Mp3IO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + input_stream.rewind()?; + + let header = ID3V2Header::read_header(input_stream)?; + input_stream.rewind()?; + + let mut out_tag = Tag::new(); + + // wrapper to protect input stream from being gobbled + let reader = CAIReadWrapper { + reader: input_stream, + }; + + if let Ok(tag) = Tag::read_from(reader) { + for f in tag.frames() { + match f.content() { + // remove existing manifest keeping existing frames + Content::EncapsulatedObject(eo) => { + if eo.mime_type != "application/x-c2pa-manifest-store" { + out_tag.add_frame(f.clone()); + } + } + _ => { + out_tag.add_frame(f.clone()); + } + } + } + } + + // only add new tags + if !store_bytes.is_empty() { + // Add new manifest store + let frame = Frame::with_content( + "GEOB", + Content::EncapsulatedObject(EncapsulatedObject { + mime_type: GEOB_FRAME_MIME_TYPE.to_string(), + filename: GEOB_FRAME_FILE_NAME.to_string(), + description: GEOB_FRAME_DESCRIPTION.to_string(), + data: store_bytes.to_vec(), + }), + ); + + out_tag.add_frame(frame); + } + + // wrapper to protect output stream from being gobbled + let writer = CAIReadWriteWrapper { + reader_writer: output_stream, + }; + + // write new tag to output stream + out_tag + .write_to(writer, Version::Id3v24) + .map_err(|_e| Error::EmbeddingError)?; + + // skip past old ID3V2 + input_stream.seek(SeekFrom::Start(header.get_size() as u64))?; + + // copy source data to output + std::io::copy(input_stream, output_stream)?; + + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + input_stream: &mut dyn CAIRead, + ) -> Result> { + let output_buf: Vec = Vec::new(); + let mut output_stream = Cursor::new(output_buf); + + add_required_frame(&self._mp3_format, input_stream, &mut output_stream)?; + + let mut positions: Vec = Vec::new(); + + let (manifest_pos, manifest_len) = + get_manifest_pos(&mut output_stream).ok_or(Error::EmbeddingError)?; + + positions.push(HashObjectPositions { + offset: usize::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + length: usize::value_from(manifest_len) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Cai, + }); + + // add hash of chunks before cai + positions.push(HashObjectPositions { + offset: 0, + length: usize::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Other, + }); + + // add position from cai to end + let end = u64::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))? + + u64::value_from(manifest_len) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let file_end = output_stream.seek(SeekFrom::End(0))?; + positions.push(HashObjectPositions { + offset: usize::value_from(end) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, // len of cai + length: usize::value_from(file_end - end) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Other, + }); + + Ok(positions) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + self.write_cai(input_stream, output_stream, &[]) + } +} + +impl AssetPatch for Mp3IO { + fn patch_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut asset = OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open(asset_path)?; + + let (manifest_pos, manifest_len) = + get_manifest_pos(&mut asset).ok_or(Error::EmbeddingError)?; + + if store_bytes.len() == manifest_len as usize { + asset.seek(SeekFrom::Start(manifest_pos))?; + asset.write_all(store_bytes)?; + Ok(()) + } else { + Err(Error::InvalidAsset( + "patch_cai_store store size mismatch.".to_string(), + )) + } + } +} + +#[cfg(test)] +pub mod tests { + #![allow(clippy::expect_used)] + #![allow(clippy::panic)] + #![allow(clippy::unwrap_used)] + + use tempfile::tempdir; + + use super::*; + use crate::utils::{ + hash_utils::vec_compare, + test::{fixture_path, temp_dir_path}, + }; + + #[test] + fn test_write_mp3() { + let more_data = "some more test data".as_bytes(); + let source = fixture_path("sample1.mp3"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "sample1-mp3.mp3"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let mp3_io = Mp3IO::new("mp3"); + + if let Ok(()) = mp3_io.save_cai_store(&output, more_data) { + if let Ok(read_test_data) = mp3_io.read_cai_store(&output) { + assert!(vec_compare(more_data, &read_test_data)); + success = true; + } + } + } + } + assert!(success) + } + + #[test] + fn test_patch_write_mp3() { + let test_data = "some test data".as_bytes(); + let source = fixture_path("sample1.mp3"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "sample1-mp3.mp3"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let mp3_io = Mp3IO::new("mp3"); + + if let Ok(()) = mp3_io.save_cai_store(&output, test_data) { + if let Ok(source_data) = mp3_io.read_cai_store(&output) { + // create replacement data of same size + let mut new_data = vec![0u8; source_data.len()]; + new_data[..test_data.len()].copy_from_slice(test_data); + mp3_io.patch_cai_store(&output, &new_data).unwrap(); + + let replaced = mp3_io.read_cai_store(&output).unwrap(); + + assert_eq!(new_data, replaced); + + success = true; + } + } + } + } + assert!(success) + } + + #[test] + fn test_remove_c2pa() { + let source = fixture_path("sample1.mp3"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "sample1-mp3.mp3"); + + std::fs::copy(source, &output).unwrap(); + let mp3_io = Mp3IO::new("wav"); + + mp3_io.remove_cai_store(&output).unwrap(); + + // read back in asset, JumbfNotFound is expected since it was removed + match mp3_io.read_cai_store(&output) { + Err(Error::JumbfNotFound) => (), + _ => unreachable!(), + } + } + + #[test] + fn test_remote_ref() -> Result<()> { + let mp3_io = Mp3IO::new("mp3"); + + let mut stream = File::open(fixture_path("sample1.mp3"))?; + assert!(mp3_io.read_xmp(&mut stream).is_none()); + stream.rewind()?; + + let mut output_stream1 = Cursor::new(Vec::new()); + mp3_io.embed_reference_to_stream( + &mut stream, + &mut output_stream1, + RemoteRefEmbedType::Xmp("Test".to_owned()), + )?; + output_stream1.rewind()?; + + let xmp = mp3_io.read_xmp(&mut output_stream1); + assert_eq!(xmp, Some("http://ns.adobe.com/xap/1.0/\0\n\n \n \n \n \n".to_owned())); + + Ok(()) + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/pdf.rs b/sdk/crates/c2pa-codecs/src/codecs/pdf.rs new file mode 100644 index 000000000..0048a5451 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/pdf.rs @@ -0,0 +1,792 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +// TODO: Remove this after we finish the PDF write feature. +#![allow(dead_code)] + +use std::io::{Read, Write}; + +use lopdf::{ + dictionary, Document, Object, + Object::{Array, Integer, Name, Reference}, + ObjectId, Stream, +}; +use thiserror::Error; + +// Associated File Relationship +static AF_RELATIONSHIP_KEY: &[u8] = b"AFRelationship"; +static ANNOTATIONS_KEY: &[u8] = b"Annots"; +static ASSOCIATED_FILE_KEY: &[u8] = b"AF"; +static C2PA_RELATIONSHIP: &[u8] = b"C2PA_Manifest"; +static CONTENT_CREDS: &str = "Content Credentials"; +static EMBEDDED_FILES_KEY: &[u8] = b"EmbeddedFiles"; +static SUBTYPE_KEY: &[u8] = b"Subtype"; +static TYPE_KEY: &[u8] = b"Type"; +static NAMES_KEY: &[u8] = b"Names"; + +/// Error representing failure scenarios while interacting with PDFs. +#[derive(Debug, Error)] +pub enum Error { + /// Error occurred while reading the PDF. Look into the wrapped `lopdf::Error` for more + /// information on the cause. + #[error(transparent)] + UnableToReadPdf(#[from] lopdf::Error), + + /// No Manifest is present in the PDF. + #[error("No manifest is present in the PDF.")] + NoManifest, + + /// Error occurred while adding a C2PA manifest as an `Annotation` to the PDF. + #[error("Unable to add C2PA manifest as an annotation to the PDF.")] + AddingAnnotation, + + // The PDF has an `AFRelationship` set to C2PA, but we were unable to find + // the manifest bytes in the PDF's embedded files. + #[error("Unable to find C2PA manifest in the PDF's embedded files.")] + UnableToFindEmbeddedFileManifest, + + /// This error occurs when we an error was encountered trying to find the PDF's C2PA embedded + /// file specification in the array of Associated Files defined in the catalog. + #[error("Unable to find a C2PA embedded file specification in PDF's associated files array")] + FindingC2PAFileSpec, +} + +const C2PA_MIME_TYPE: &str = "application/x-c2pa-manifest-store"; + +#[cfg_attr(test, mockall::automock)] +pub(crate) trait C2paPdf: Sized { + /// Save the `C2paPdf` implementation to the provided `writer`. + fn save_to(&mut self, writer: &mut W) -> Result<(), std::io::Error>; + + /// Returns `true` if the `PDF` is password protected, `false` otherwise. + fn is_password_protected(&self) -> bool; + + /// Returns `true` if this PDF has C2PA Manifests, `false` otherwise. + fn has_c2pa_manifest(&self) -> bool; + + /// Writes provided `bytes` as a PDF `Embedded File` + fn write_manifest_as_embedded_file(&mut self, bytes: Vec) -> Result<(), Error>; + + /// Writes provided `bytes` as a PDF `Annotation`. + fn write_manifest_as_annotation(&mut self, vec: Vec) -> Result<(), Error>; + + /// Returns a reference to the C2PA manifest bytes. + #[allow(clippy::needless_lifetimes)] // required for automock::mockall + fn read_manifest_bytes<'a>(&'a self) -> Result>, Error>; + + fn remove_manifest_bytes(&mut self) -> Result<(), Error>; + + fn read_xmp(&self) -> Option; +} + +pub(crate) struct Pdf { + document: Document, +} + +impl C2paPdf for Pdf { + /// Saves the in-memory PDF to the provided `writer`. + fn save_to(&mut self, writer: &mut W) -> Result<(), std::io::Error> { + self.document.save_to(writer) + } + + fn is_password_protected(&self) -> bool { + self.document.is_encrypted() + } + + /// Determines if this PDF has a C2PA manifest embedded. + /// + /// This is done by checking if the Associated File key of the catalog points to a + /// [Object::Dictionary] with an `AFRelationship` set to `C2PA_Manifest`. + fn has_c2pa_manifest(&self) -> bool { + self.c2pa_file_spec_object_id().is_some() + } + + /// Writes the provided `bytes` to the PDF as an `EmbeddedFile`. + fn write_manifest_as_embedded_file(&mut self, bytes: Vec) -> Result<(), Error> { + // Add `FileStream` and `FileSpec` to the PDF. + let file_stream_ref = self.add_c2pa_embedded_file_stream(bytes); + let file_spec_ref = self.add_embedded_file_specification(file_stream_ref); + + self.push_associated_file(file_spec_ref)?; + + let mut manifest_name_file_pair = vec![ + Object::string_literal(CONTENT_CREDS), + Reference(file_spec_ref), + ]; + + let Ok(catalog_names) = self.document.catalog_mut()?.get_mut(NAMES_KEY) else { + // No /Names key exists in the Catalog. We can safely add the /Names key and construct + // the remaining objects. + // Add /EmbeddedFiles dictionary as indirect object. + let embedded_files_ref = self.document.add_object(dictionary! { + NAMES_KEY => manifest_name_file_pair + }); + + // Add /Names dictionary as indirect object + let names_ref = self.document.add_object(dictionary! { + EMBEDDED_FILES_KEY => Reference(embedded_files_ref) + }); + + // Set /Names key in `Catalog` to reference above indirect object names dictionary. + self.document.catalog_mut()?.set(NAMES_KEY, names_ref); + return Ok(()); + }; + + // Follows the Reference to the /EmbeddedFiles Dictionary, if the Object is a Reference. + let names_dictionary = match catalog_names.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?, + _ => catalog_names.as_dict_mut()?, + }; + + let Ok(embedded_files) = names_dictionary.get_mut(EMBEDDED_FILES_KEY) else { + // We have a /Names dictionary, but are missing the /EmbeddedFiles dictionary + // and its /Names array of embedded files. + names_dictionary.set( + EMBEDDED_FILES_KEY, + dictionary! { NAMES_KEY => manifest_name_file_pair }, + ); + return Ok(()); + }; + + // Follows the reference to the /EmbeddedFiles Dictionary, if the Object is a Reference. + let embedded_files_dictionary = match embedded_files.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?, + _ => embedded_files.as_dict_mut()?, + }; + + let Ok(names) = embedded_files_dictionary.get_mut(NAMES_KEY) else { + // This PDF has the /Names dictionary, and it has the /EmbeddedFiles + // dictionary, but the /EmbeddedFiles Dictionary is missing the /Names Array. + embedded_files_dictionary.set( + NAMES_KEY, + dictionary! { NAMES_KEY => manifest_name_file_pair }, + ); + + return Ok(()); + }; + + // Follows the reference to the /Names Array, if the Object is a Reference. + let names_array = match names.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_array_mut()?, + _ => names.as_array_mut()?, + }; + + // The PDF has the /Names dictionary, which contains the /EmbeddedFiles Dictionary, which + // contains the /Names array. Append the manifest's name (Content Credentials) + // and its reference. + names_array.append(&mut manifest_name_file_pair); + + Ok(()) + } + + /// Writes the provided bytes to the PDF as a `FileAttachment` `Annotation`. This `Annotation` + /// is added to the first page of the `PDF`, to the lower left corner. + fn write_manifest_as_annotation(&mut self, bytes: Vec) -> Result<(), Error> { + let file_stream_reference = self.add_c2pa_embedded_file_stream(bytes); + let file_spec_reference = self.add_embedded_file_specification(file_stream_reference); + + self.push_associated_file(file_spec_reference)?; + self.add_file_attachment_annotation(file_spec_reference)?; + + Ok(()) + } + + /// Gets a reference to the `C2PA` manifest bytes of the PDF. + /// + /// This method will read the bytes of the manifest, whether the manifest was added to the + /// PDF via an `Annotation` or an `EmbeddedFile`. + /// + /// Returns an `Ok(None)` if no manifest is present. Returns a `Ok(Some(Vec<&[u8]>))` when a manifest + /// is present. + /// + /// ### Note: + /// + /// A `Vec<&[u8]>` is returned because it's possible for a PDF's manifests to be stored + /// separately, due to PDF's "Incremental Update" feature. See the spec for more details: + /// + fn read_manifest_bytes(&self) -> Result>, Error> { + let Some(id) = self.c2pa_file_spec_object_id() else { + return Ok(None); + }; + + let ef = &self + .document + .get_object(id) + .and_then(Object::as_dict)? + .get_deref(b"EF", &self.document)? + .as_dict()?; // EF dictionary + + Ok(Some(vec![ + &ef.get_deref(b"F", &self.document)? // F embedded file stream + .as_stream()? + .content, + ])) + } + + fn remove_manifest_bytes(&mut self) -> Result<(), Error> { + if !self.has_c2pa_manifest() { + return Err(Error::NoManifest); + } + + // Find the File Spec, which contains the reference to the manifest. + let file_spec_ref = self + .c2pa_file_spec_object_id() + .ok_or_else(|| Error::NoManifest)?; + + // Find the manifest's file stream. + let file_stream_ef_ref = self + .document + .get_object(file_spec_ref)? + .as_dict()? + .get(b"EF")?; + + let file_stream_ref = file_stream_ef_ref.as_dict()?.get(b"F")?.as_reference()?; + + // Attempt to remove the manifest from the PDF's `Embedded Files`s. If the manifest + // isn't in the PDF's embedded files, remove the manifest from the PDF's annotations. + // + // We do the operation in this order because a PDF's annotations are attached to a page. + // It's possible we'd have to iterate over every page of the PDF before determining the + // manifest is referenced from an Embedded File instead. + self.remove_manifest_from_embedded_files() + .or_else(|_| self.remove_manifest_from_annotations())?; + + // Remove C2PA associated files from the `AF` key in the catalog. + self.remove_c2pa_file_spec_reference()?; + + // Delete the manifest and its descriptor from the PDF + self.document.delete_object(file_stream_ref); + self.document.delete_object(file_spec_ref); + + Ok(()) + } + + /// Reads the `Metadata` field referenced in the PDF document's `Catalog` entry. Will return + /// `None` if no Metadata is present. + fn read_xmp(&self) -> Option { + self.document + .catalog() + .and_then(|catalog| catalog.get_deref(b"Metadata", &self.document)) + .and_then(Object::as_stream) + .ok() + .and_then(|stream_dict| { + let Ok(subtype_str) = stream_dict + .dict + .get_deref(SUBTYPE_KEY, &self.document) + .and_then(Object::as_name_str) + else { + return None; + }; + + if subtype_str.to_lowercase() != "xml" { + return None; + } + + String::from_utf8(stream_dict.content.clone()).ok() + }) + } +} + +impl Pdf { + #[allow(dead_code)] + pub fn from_bytes(bytes: &[u8]) -> Result { + let document = Document::load_mem(bytes)?; + Ok(Self { document }) + } + + pub fn from_reader(source: R) -> Result { + let document = Document::load_from(source)?; + Ok(Self { document }) + } + + /// Returns a reference to the Associated Files array from the PDF's Catalog. + fn associated_files(&self) -> Result<&Vec, Error> { + Ok(self + .document + .catalog()? + .get_deref(ASSOCIATED_FILE_KEY, &self.document)? + .as_array()?) + } + + /// Returns the [Object::ObjectId] of the C2PA File Spec Reference, if it is present in the + /// PDF's associated files array. + fn c2pa_file_spec_object_id(&self) -> Option { + self.associated_files().ok()?.iter().find_map(|value| { + let Ok(reference) = value.as_reference() else { + return None; + }; + + let name = self + .document + .get_object(reference) + .and_then(Object::as_dict) + .and_then(|dict| dict.get_deref(AF_RELATIONSHIP_KEY, &self.document)) + .and_then(Object::as_name) + .ok()?; + + (name == C2PA_RELATIONSHIP).then_some(reference) + }) + } + + /// Removes the C2PA File Spec Reference if it exists in the Associated Files [Object::Array] of + /// PDF's catalog. This will return an [Err] if the PDF doesn't contain a C2PA File Spec + /// Reference. + fn remove_c2pa_file_spec_reference(&mut self) -> Result<(), Error> { + let c2pa_file_spec_reference = self + .c2pa_file_spec_object_id() + .ok_or_else(|| Error::FindingC2PAFileSpec)?; + + self.document + .catalog_mut()? + .get_mut(ASSOCIATED_FILE_KEY)? + .as_array_mut()? + .retain(|v| { + let Ok(reference) = v.as_reference() else { + return true; + }; + + reference != c2pa_file_spec_reference + }); + + Ok(()) + } + + /// Adds the C2PA `Annotation` to the PDF. + /// + /// ### Note: + /// The `FileAttachment` annotation is added to the first page of the PDF in the lower + /// left-hand corner. The `FileAttachment`'s location is not defined in the spec as of version + /// `1.3`. + fn add_file_attachment_annotation( + &mut self, + file_spec_reference: ObjectId, + ) -> Result<(), Error> { + let annotation = dictionary! { + "Type" => Name("Annot".into()), + "Contents" => Object::string_literal(CONTENT_CREDS), + "Name" => Object::string_literal(CONTENT_CREDS), + SUBTYPE_KEY => Name("FileAttachment".into()), + "FS" => Reference(file_spec_reference), + // Places annotation in the lower left-hand corner. The icon will be 10x10. + "Rect" => vec![0.into(), 0.into(), 10.into(), 10.into()], + }; + + // Add C2PA annotation as an indirect object. + let annotation_ref = self.document.add_object(annotation); + + // Find the reference to the first page of the PDF. + let first_page_ref = self + .document + .page_iter() + .next() + .ok_or_else(|| Error::AddingAnnotation)?; + + // Get a mutable ref to the first page as a Dictionary object. + let first_page = self + .document + .get_object_mut(first_page_ref)? + .as_dict_mut()?; + + // Ensures the /Annots array exists on the page object. + if !first_page.has(ANNOTATIONS_KEY) { + first_page.set(ANNOTATIONS_KEY, Array(vec![])) + } + + // Follows a reference to the indirect annotations array, if it exists. + let annotation_object = first_page.get_mut(ANNOTATIONS_KEY)?; + let annotations = if let Ok(v) = annotation_object.as_reference() { + self.document.get_object_mut(v)? + } else { + annotation_object + } + .as_array_mut()?; + + annotations.push(Reference(annotation_ref)); + Ok(()) + } + + /// Creates, or appends to, the Associated File (`AF`) array the embedded file spec reference of the + /// C2PA data. + fn push_associated_file(&mut self, embedded_file_spec_ref: ObjectId) -> Result<(), Error> { + let catalog = self.document.catalog_mut()?; + if catalog.get_mut(ASSOCIATED_FILE_KEY).is_err() { + // Add associated files array to catalog if it isn't already present. + catalog.set(ASSOCIATED_FILE_KEY, vec![]); + } + + let associated_files = catalog.get_mut(ASSOCIATED_FILE_KEY)?; + let associated_files = match associated_files.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?, + _ => associated_files, + } + .as_array_mut()?; + + associated_files.push(Reference(embedded_file_spec_ref)); + + Ok(()) + } + + /// Adds the `Embedded File Specification` to the PDF document. Returns the [Reference] + /// to the added `Embedded File Specification`. + fn add_embedded_file_specification(&mut self, file_stream_ref: ObjectId) -> ObjectId { + let embedded_file_stream = dictionary! { + AF_RELATIONSHIP_KEY => Name(C2PA_RELATIONSHIP.into()), + "Desc" => Object::string_literal(CONTENT_CREDS), + "F" => Object::string_literal(CONTENT_CREDS), + "EF" => dictionary! { + "F" => Reference(file_stream_ref), + }, + TYPE_KEY => Name("FileSpec".into()), + "UF" => Object::string_literal(CONTENT_CREDS), + }; + + self.document.add_object(embedded_file_stream) + } + + /// Adds the provided `bytes` as a `StreamDictionary` to the PDF document. Returns the + /// [Reference] of the added [Object]. + fn add_c2pa_embedded_file_stream(&mut self, bytes: Vec) -> ObjectId { + let stream = Stream::new( + dictionary! { + "F" => dictionary! { + SUBTYPE_KEY => C2PA_MIME_TYPE, + "Length" => Integer(bytes.len() as i64), + }, + }, + bytes, + ); + + self.document.add_object(stream) + } + + /// Remove the C2PA Manifest `Annotation` from the PDF. + fn remove_manifest_from_annotations(&mut self) -> Result<(), Error> { + for (_, page_id) in self.document.get_pages() { + self.document + .get_object_mut(page_id)? + .as_dict_mut()? + .get_mut(ANNOTATIONS_KEY)? + .as_array_mut()? + .retain(|obj| { + obj.as_dict() + .and_then(|annot| annot.get(TYPE_KEY)) + .and_then(Object::as_name_str) + .map(|str| str != CONTENT_CREDS) + .unwrap_or(true) + }); + } + + Ok(()) + } + + /// Removes the manifest from the PDF's embedded files collection. + fn remove_manifest_from_embedded_files(&mut self) -> Result<(), Error> { + let Ok(names) = self.document.catalog_mut()?.get_mut(NAMES_KEY) else { + return Err(Error::NoManifest); + }; + + // Follows the reference to the /Names Dictionary. + let names_dictionary = match names.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?, + _ => names.as_dict_mut()?, + }; + + // Follows the reference to the /EmbeddedFiles Dictionary. + let embedded_files_object = names_dictionary.get_mut(EMBEDDED_FILES_KEY)?; + let embedded_files_dictionary = match embedded_files_object.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_dict_mut()?, + _ => embedded_files_object.as_dict_mut()?, + }; + + // Gets the /Names array from the /EmbeddedFiles Dictionary. This will contain the reference + // to the C2PA manifest. + let names_vector_object = embedded_files_dictionary.get_mut(NAMES_KEY)?; + let names_vector = match names_vector_object.as_reference() { + Ok(object_id) => self.document.get_object_mut(object_id)?.as_array_mut()?, + _ => names_vector_object.as_array_mut()?, + }; + + // Find the "Content Credentials" marker name in the /Names Array. + let content_creds_marker_idx = names_vector + .iter() + .position(|value| { + value + .as_string() + .map(|value| value == CONTENT_CREDS) + .unwrap_or_default() + }) + .ok_or_else(|| Error::UnableToFindEmbeddedFileManifest)?; + + let content_creds_reference_idx = content_creds_marker_idx + 1; + if content_creds_reference_idx >= names_vector.len() { + return Err(Error::UnableToFindEmbeddedFileManifest); + } + + // Delete the "Content Credentials" marker object and the reference to the C2PA + // manifest in the PDF's embedded files. + names_vector.drain(content_creds_marker_idx..=content_creds_reference_idx); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + + use super::*; + + #[cfg(target_arch = "wasm32")] + wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + + #[cfg(target_arch = "wasm32")] + use wasm_bindgen_test::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_loads_pdf_from_bytes() { + let bytes = include_bytes!("../../tests/fixtures/basic.pdf"); + let pdf_result = Pdf::from_bytes(bytes); + assert!(pdf_result.is_ok()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_loads_pdf_from_bytes_with_invalid_file() { + let bytes = include_bytes!("../../tests/fixtures/XCA.jpg"); + let pdf_result = Pdf::from_bytes(bytes); + assert!(matches!(pdf_result, Err(Error::UnableToReadPdf(_)))); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_is_password_protected() { + let bytes = include_bytes!("../../tests/fixtures/basic-password.pdf"); + let pdf_result = Pdf::from_bytes(bytes).unwrap(); + assert!(pdf_result.is_password_protected()); + + let bytes = include_bytes!("../../tests/fixtures/basic.pdf"); + let pdf = Pdf::from_bytes(bytes).unwrap(); + assert!(!pdf.is_password_protected()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_has_c2pa_manifest_on_file_without_manifest() { + let bytes = include_bytes!("../../tests/fixtures/basic.pdf"); + let pdf = Pdf::from_bytes(bytes).unwrap(); + assert!(!pdf.has_c2pa_manifest()) + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_has_c2pa_manifest_on_file_with_manifest() { + let bytes = include_bytes!("../../tests/fixtures/basic.pdf"); + let mut pdf = Pdf::from_bytes(bytes).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + + pdf.write_manifest_as_annotation(vec![0u8, 1u8]).unwrap(); + assert!(pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_adds_embedded_file_spec_to_pdf_stream() { + let bytes = include_bytes!("../../tests/fixtures/express.pdf"); + let mut pdf = Pdf::from_bytes(bytes).unwrap(); + let object_count_before_add = pdf.document.objects.len(); + + let bytes = vec![10u8]; + let id = pdf.add_c2pa_embedded_file_stream(bytes.clone()); + + // Object added to the PDF's object collection. + assert_eq!(object_count_before_add + 1, pdf.document.objects.len()); + + // We are able to find the object. + let stream = pdf.document.get_object(id); + assert_eq!(stream.unwrap().as_stream().unwrap().content, bytes); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_write_manifest_as_annotation() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/express.pdf")).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + pdf.write_manifest_as_annotation(vec![10u8, 20u8]).unwrap(); + assert!(pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_write_manifest_bytes_to_pdf_with_existing_annotations() { + let mut pdf = + Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-annotation.pdf")).unwrap(); + pdf.write_manifest_as_annotation(vec![10u8, 20u8]).unwrap(); + assert!(pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_add_manifest_to_embedded_files() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + pdf.write_manifest_as_embedded_file(vec![10u8, 20u8]) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_add_manifest_to_embedded_files_attachments_present() { + let mut pdf = + Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-attachments.pdf")).unwrap(); + pdf.write_manifest_as_embedded_file(vec![10u8, 20u8]) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_save_to() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + + pdf.write_manifest_as_annotation(vec![10u8]).unwrap(); + assert!(pdf.has_c2pa_manifest()); + + let mut saved_bytes = vec![]; + pdf.save_to(&mut saved_bytes).unwrap(); + + let saved_pdf = Pdf::from_bytes(&saved_bytes).unwrap(); + assert!(saved_pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_reads_manifest_bytes_for_embedded_files_manifest() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/express.pdf")).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + + let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8]; + pdf.write_manifest_as_embedded_file(manifest_bytes.clone()) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + assert!(matches!( + pdf.read_manifest_bytes(), + Ok(Some(manifests)) if manifests[0] == manifest_bytes + )); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_reads_manifest_bytes_for_annotation_manifest() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + + let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8]; + pdf.write_manifest_as_annotation(manifest_bytes.clone()) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + assert!(matches!( + pdf.read_manifest_bytes(), + Ok(Some(manifests)) if manifests[0] == manifest_bytes + )); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_read_manifest_bytes_from_pdf_without_bytes_returns_none() { + let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + assert!(!pdf.has_c2pa_manifest()); + assert!(matches!(pdf.read_manifest_bytes(), Ok(None))); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_read_manifest_bytes_from_pdf_with_other_af_relationship_returns_none() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + pdf.document + .catalog_mut() + .unwrap() + .set(ASSOCIATED_FILE_KEY, vec![Reference((100, 0))]); + + assert!(matches!(pdf.read_manifest_bytes(), Ok(None))); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_read_pdf_with_associated_file_that_is_not_manifest() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + pdf.document + .catalog_mut() + .unwrap() + .set(ASSOCIATED_FILE_KEY, Reference((100, 0))); + + assert!(matches!(pdf.read_manifest_bytes(), Ok(None))); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_read_xmp_on_pdf_with_none() { + let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic-no-xmp.pdf")).unwrap(); + assert!(pdf.read_xmp().is_none()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_read_xmp_on_pdf_with_some_metadata() { + let pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + assert!(pdf.read_xmp().is_some()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_remove_manifest_bytes_from_file_without_c2pa_returns_error() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + + assert!(matches!( + pdf.remove_manifest_bytes(), + Err(Error::NoManifest) + )); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_remove_manifest_from_file_with_annotation_based_manifest() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8]; + pdf.write_manifest_as_annotation(manifest_bytes.clone()) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + assert!(pdf.remove_manifest_bytes().is_ok()); + assert!(!pdf.has_c2pa_manifest()); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_remove_manifest_from_file_with_embedded_file_based_manifest() { + let mut pdf = Pdf::from_bytes(include_bytes!("../../tests/fixtures/basic.pdf")).unwrap(); + let manifest_bytes = vec![0u8, 1u8, 1u8, 2u8, 3u8]; + + pdf.write_manifest_as_embedded_file(manifest_bytes.clone()) + .unwrap(); + + assert!(pdf.has_c2pa_manifest()); + assert!(pdf.remove_manifest_bytes().is_ok()); + assert!(!pdf.has_c2pa_manifest()); + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/pdf_io.rs b/sdk/crates/c2pa-codecs/src/codecs/pdf_io.rs new file mode 100644 index 000000000..a56e35fb2 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/pdf_io.rs @@ -0,0 +1,241 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{fs::File, path::Path}; + +use crate::{ + asset_handlers::pdf::{C2paPdf, Pdf}, + asset_io::{AssetIO, CAIRead, CAIReader, CAIWriter, ComposedManifestRef, HashObjectPositions}, + Error, + Error::{JumbfNotFound, NotImplemented, PdfReadError}, +}; + +static SUPPORTED_TYPES: [&str; 2] = ["pdf", "application/pdf"]; +static WRITE_NOT_IMPLEMENTED: &str = "PDF write functionality will be added in a future release"; + +pub struct PdfIO {} + +impl CAIReader for PdfIO { + fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> crate::Result> { + asset_reader.rewind()?; + let pdf = Pdf::from_reader(asset_reader).map_err(|e| Error::InvalidAsset(e.to_string()))?; + self.read_manifest_bytes(pdf) + } + + fn read_xmp(&self, asset_reader: &mut dyn CAIRead) -> Option { + if asset_reader.rewind().is_err() { + return None; + } + + let Ok(pdf) = Pdf::from_reader(asset_reader) else { + return None; + }; + + self.read_xmp_from_pdf(pdf) + } +} + +impl PdfIO { + fn read_manifest_bytes(&self, pdf: impl C2paPdf) -> crate::Result> { + let Ok(result) = pdf.read_manifest_bytes() else { + return Err(PdfReadError); + }; + + let Some(bytes) = result else { + return Err(JumbfNotFound); + }; + + match bytes.as_slice() { + [bytes] => Ok(bytes.to_vec()), + _ => Err(NotImplemented( + "c2pa-rs only supports reading PDFs with one manifest".into(), + )), + } + } + + fn read_xmp_from_pdf(&self, pdf: impl C2paPdf) -> Option { + pdf.read_xmp() + } +} + +impl AssetIO for PdfIO { + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + Self {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(PdfIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, _asset_type: &str) -> Option> { + None + } + + fn read_cai_store(&self, asset_path: &Path) -> crate::Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, _asset_path: &Path, _store_bytes: &[u8]) -> crate::Result<()> { + Err(NotImplemented(WRITE_NOT_IMPLEMENTED.into())) + } + + fn get_object_locations(&self, _asset_path: &Path) -> crate::Result> { + Err(NotImplemented(WRITE_NOT_IMPLEMENTED.into())) + } + + fn remove_cai_store(&self, _asset_path: &Path) -> crate::Result<()> { + Err(NotImplemented(WRITE_NOT_IMPLEMENTED.into())) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } + + fn composed_data_ref(&self) -> Option<&dyn ComposedManifestRef> { + Some(self) + } +} + +impl ComposedManifestRef for PdfIO { + // Return entire CAI block as Vec + fn compose_manifest(&self, manifest_data: &[u8], _format: &str) -> Result, Error> { + Ok(manifest_data.to_vec()) + } +} + +#[cfg(test)] +pub mod tests { + #![allow(clippy::panic)] + #![allow(clippy::unwrap_used)] + + use std::io::Cursor; + + use crate::{ + asset_handlers, + asset_handlers::{pdf::MockC2paPdf, pdf_io::PdfIO}, + asset_io::{AssetIO, CAIReader}, + }; + + static MANIFEST_BYTES: &[u8; 2] = &[10u8, 20u8]; + + #[test] + fn test_error_reading_manifest_fails() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf.expect_read_manifest_bytes().returning(|| { + Err(asset_handlers::pdf::Error::UnableToReadPdf( + lopdf::Error::ReferenceLimit, + )) + }); + + let pdf_io = PdfIO::new("pdf"); + assert!(matches!( + pdf_io.read_manifest_bytes(mock_pdf), + Err(crate::Error::PdfReadError) + )) + } + + #[test] + fn test_no_manifest_found_returns_no_jumbf_error() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf.expect_read_manifest_bytes().returning(|| Ok(None)); + let pdf_io = PdfIO::new("pdf"); + + assert!(matches!( + pdf_io.read_manifest_bytes(mock_pdf), + Err(crate::Error::JumbfNotFound) + )); + } + + #[test] + fn test_one_manifest_found_returns_bytes() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf + .expect_read_manifest_bytes() + .returning(|| Ok(Some(vec![MANIFEST_BYTES]))); + + let pdf_io = PdfIO::new("pdf"); + assert_eq!( + pdf_io.read_manifest_bytes(mock_pdf).unwrap(), + MANIFEST_BYTES.to_vec() + ); + } + + #[test] + fn test_multiple_manifest_fail_with_not_implemented_error() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf + .expect_read_manifest_bytes() + .returning(|| Ok(Some(vec![MANIFEST_BYTES, MANIFEST_BYTES, MANIFEST_BYTES]))); + + let pdf_io = PdfIO::new("pdf"); + + assert!(matches!( + pdf_io.read_manifest_bytes(mock_pdf), + Err(crate::Error::NotImplemented(_)) + )); + } + + #[test] + fn test_returns_none_when_no_xmp() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf.expect_read_xmp().returning(|| None); + + let pdf_io = PdfIO::new("pdf"); + assert!(pdf_io.read_xmp_from_pdf(mock_pdf).is_none()); + } + + #[test] + fn test_returns_some_when_some_xmp() { + let mut mock_pdf = MockC2paPdf::default(); + mock_pdf.expect_read_xmp().returning(|| Some("xmp".into())); + + let pdf_io = PdfIO::new("pdf"); + assert!(pdf_io.read_xmp_from_pdf(mock_pdf).is_some()); + } + + #[test] + fn test_cai_read_finds_no_manifest() { + let source = crate::utils::test::fixture_path("basic.pdf"); + let pdf_io = PdfIO::new("pdf"); + + assert!(matches!( + pdf_io.read_cai_store(&source), + Err(crate::Error::JumbfNotFound) + )); + } + + #[test] + fn test_cai_read_xmp_finds_xmp_data() { + let source = include_bytes!("../../tests/fixtures/basic.pdf"); + let mut stream = Cursor::new(source.to_vec()); + + let pdf_io = PdfIO::new("pdf"); + assert!(pdf_io.read_xmp(&mut stream).is_some()); + } + + #[test] + fn test_read_cai_express_pdf_finds_single_manifest_store() { + let source = include_bytes!("../../tests/fixtures/express-signed.pdf"); + let pdf_io = PdfIO::new("pdf"); + let mut pdf_stream = Cursor::new(source.to_vec()); + assert!(pdf_io.read_cai(&mut pdf_stream).is_ok()); + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/png_io.rs b/sdk/crates/c2pa-codecs/src/codecs/png_io.rs new file mode 100644 index 000000000..34ebb27e7 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/png_io.rs @@ -0,0 +1,1078 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + fs::File, + io::{Cursor, Read, Seek, SeekFrom}, + path::Path, +}; + +use byteorder::{BigEndian, ReadBytesExt}; +use conv::ValueFrom; +use png_pong::chunk::InternationalText; +use serde_bytes::ByteBuf; +use tempfile::Builder; + +use crate::{ + assertions::{BoxMap, C2PA_BOXHASH}, + asset_io::{ + rename_or_move, AssetBoxHash, AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, + ComposedManifestRef, HashBlockObjectType, HashObjectPositions, RemoteRefEmbed, + RemoteRefEmbedType, + }, + error::{Error, Result}, + utils::xmp_inmemory_utils::{add_provenance, MIN_XMP}, +}; + +const PNG_ID: [u8; 8] = [137, 80, 78, 71, 13, 10, 26, 10]; +const CAI_CHUNK: [u8; 4] = *b"caBX"; +const IMG_HDR: [u8; 4] = *b"IHDR"; +const ITXT_CHUNK: [u8; 4] = *b"iTXt"; +const XMP_KEY: &str = "XML:com.adobe.xmp"; +const PNG_END: [u8; 4] = *b"IEND"; +const PNG_HDR_LEN: u64 = 12; + +static SUPPORTED_TYPES: [&str; 2] = ["png", "image/png"]; + +#[derive(Clone, Debug)] +struct PngChunkPos { + pub start: u64, + pub length: u32, + pub name: [u8; 4], + #[allow(dead_code)] + pub name_str: String, +} + +impl PngChunkPos { + pub fn end(&self) -> u64 { + self.start + self.length as u64 + PNG_HDR_LEN + } +} + +fn get_png_chunk_positions(f: &mut R) -> Result> { + let current_len = f.seek(SeekFrom::End(0))?; + let mut chunk_positions: Vec = Vec::new(); + + // move to beginning of file + f.rewind()?; + + let mut buf4 = [0; 4]; + let mut hdr = [0; 8]; + + // check PNG signature + f.read_exact(&mut hdr) + .map_err(|_err| Error::InvalidAsset("PNG invalid".to_string()))?; + if hdr != PNG_ID { + return Err(Error::InvalidAsset("PNG invalid".to_string())); + } + + loop { + let current_pos = f.stream_position()?; + + // read the chunk length + let length = f + .read_u32::() + .map_err(|_err| Error::InvalidAsset("PNG out of range".to_string()))?; + + // read the chunk type + f.read_exact(&mut buf4) + .map_err(|_err| Error::InvalidAsset("PNG out of range".to_string()))?; + let name = buf4; + + // seek past data + f.seek(SeekFrom::Current(length as i64)) + .map_err(|_err| Error::InvalidAsset("PNG out of range".to_string()))?; + + // read crc + f.read_exact(&mut buf4) + .map_err(|_err| Error::InvalidAsset("PNG out of range".to_string()))?; + + let chunk_name = String::from_utf8(name.to_vec()) + .map_err(|_err| Error::InvalidAsset("PNG bad chunk name".to_string()))?; + + let pcp = PngChunkPos { + start: current_pos, + length, + name, + name_str: chunk_name, + }; + + // add to list + chunk_positions.push(pcp); + + // should we break the loop + if name == PNG_END || f.stream_position()? > current_len { + break; + } + } + + Ok(chunk_positions) +} + +fn get_cai_data(f: &mut R) -> Result> { + let ps = get_png_chunk_positions(f)?; + + if ps + .clone() + .into_iter() + .filter(|pcp| pcp.name == CAI_CHUNK) + .count() + > 1 + { + return Err(Error::TooManyManifestStores); + } + + let pcp = ps + .into_iter() + .find(|pcp| pcp.name == CAI_CHUNK) + .ok_or(Error::JumbfNotFound)?; + + let length: usize = pcp.length as usize; + + f.seek(SeekFrom::Start(pcp.start + 8))?; // skip ahead from chunk start + length(4) + name(4) + + let mut data: Vec = vec![0; length]; + f.read_exact(&mut data[..]) + .map_err(|_err| Error::InvalidAsset("PNG out of range".to_string()))?; + + Ok(data) +} + +fn add_required_chunks_to_stream( + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, +) -> Result<()> { + let mut buf: Vec = Vec::new(); + input_stream.rewind()?; + input_stream.read_to_end(&mut buf).map_err(Error::IoError)?; + input_stream.rewind()?; + + let img_out = img_parts::DynImage::from_bytes(buf.into()) + .map_err(|_err| Error::InvalidAsset("Could not parse input PNG".to_owned()))?; + + if let Some(img_parts::DynImage::Png(png)) = img_out { + if png.chunk_by_type(CAI_CHUNK).is_none() { + let no_bytes: Vec = Vec::new(); + let aio = PngIO {}; + aio.write_cai(input_stream, output_stream, &no_bytes)?; + } else { + // just clone + input_stream.rewind()?; + output_stream.rewind()?; + std::io::copy(input_stream, output_stream)?; + } + } else { + return Err(Error::UnsupportedType); + } + + Ok(()) +} + +fn read_string(asset_reader: &mut dyn CAIRead, max_read: u32) -> Result { + let mut bytes_read: u32 = 0; + let mut s: Vec = Vec::with_capacity(80); + + loop { + let c = asset_reader.read_u8()?; + if c == 0 { + break; + } + + s.push(c); + + bytes_read += 1; + + if bytes_read == max_read { + break; + } + } + + Ok(String::from_utf8_lossy(&s).to_string()) +} +pub struct PngIO {} + +impl CAIReader for PngIO { + fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result> { + let cai_data = get_cai_data(asset_reader)?; + Ok(cai_data) + } + + // Get XMP block + fn read_xmp(&self, asset_reader: &mut dyn CAIRead) -> Option { + let ps = get_png_chunk_positions(asset_reader).ok()?; + let mut xmp_str: Option = None; + + ps.into_iter().find(|pcp| { + if pcp.name == ITXT_CHUNK { + // seek to start of chunk + if asset_reader.seek(SeekFrom::Start(pcp.start + 8)).is_err() { + // move +8 to get past header + return false; + } + + // parse the iTxt block + if let Ok(key) = read_string(asset_reader, pcp.length) { + if key.is_empty() || key.len() > 79 { + return false; + } + + // is this an XMP key + if key != XMP_KEY { + return false; + } + + // parse rest of iTxt to get the xmp value + let compressed = match asset_reader.read_u8() { + Ok(c) => c != 0, + Err(_) => return false, + }; + + let _compression_method = match asset_reader.read_u8() { + Ok(c) => c != 0, + Err(_) => return false, + }; + + let _langtag = match read_string(asset_reader, pcp.length) { + Ok(s) => s, + Err(_) => return false, + }; + + let _transkey = match read_string(asset_reader, pcp.length) { + Ok(s) => s, + Err(_) => return false, + }; + + // read iTxt data + let mut data = vec![ + 0u8; + pcp.length as usize + - (key.len() + _langtag.len() + _transkey.len() + 5) + ]; // data len - size of key - size of land - size of transkey - 3 "0" string terminators - compressed u8 - compression method u8 + if asset_reader.read_exact(&mut data).is_err() { + return false; + } + + // convert to string, decompress if needed + let val = if compressed { + /* should not be needed for current XMP + use flate2::read::GzDecoder; + + let cursor = Cursor::new(data); + + let mut d = GzDecoder::new(cursor); + let mut s = String::new(); + if d.read_to_string(&mut s).is_err() { + return false; + } + s + */ + return false; + } else { + String::from_utf8_lossy(&data).to_string() + }; + + xmp_str = Some(val); + + true + } else { + false + } + } else { + false + } + }); + + xmp_str + } +} + +impl CAIWriter for PngIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + let mut cai_data = Vec::new(); + let mut cai_encoder = png_pong::Encoder::new(&mut cai_data).into_chunk_enc(); + + let mut png_buf = Vec::new(); + input_stream.rewind()?; + input_stream + .read_to_end(&mut png_buf) + .map_err(Error::IoError)?; + + let mut cursor = Cursor::new(png_buf); + let mut ps = get_png_chunk_positions(&mut cursor)?; + + // get back buffer + png_buf = cursor.into_inner(); + + // create CAI store chunk + let cai_unknown = png_pong::chunk::Unknown { + name: CAI_CHUNK, + data: store_bytes.to_vec(), + }; + + let mut cai_chunk = png_pong::chunk::Chunk::Unknown(cai_unknown); + cai_encoder + .encode(&mut cai_chunk) + .map_err(|_| Error::EmbeddingError)?; + + /* splice in new chunk. Each PNG chunk has the following format: + chunk data length (4 bytes big endian) + chunk identifier (4 byte character sequence) + chunk data (0 - n bytes of chunk data) + chunk crc (4 bytes in crc in format defined in PNG spec) + */ + + // erase existing cai data + let empty_buf = Vec::new(); + let mut iter = ps.into_iter(); + if let Some(existing_cai_data) = iter.find(|png_cp| png_cp.name == CAI_CHUNK) { + // replace existing CAI data + let cai_start = usize::value_from(existing_cai_data.start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; // get beginning of chunk which starts 4 bytes before label + + let cai_end = usize::value_from(existing_cai_data.end()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; + + png_buf.splice(cai_start..cai_end, empty_buf.iter().cloned()); + }; + + // update positions and reset png_buf + cursor = Cursor::new(png_buf); + ps = get_png_chunk_positions(&mut cursor)?; + iter = ps.into_iter(); + png_buf = cursor.into_inner(); + + // add new cai data after the image header chunk + if let Some(img_hdr) = iter.find(|png_cp| png_cp.name == IMG_HDR) { + let img_hdr_end = usize::value_from(img_hdr.end()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; + + png_buf.splice(img_hdr_end..img_hdr_end, cai_data.iter().cloned()); + } else { + return Err(Error::EmbeddingError); + } + + output_stream.rewind()?; + output_stream.write_all(&png_buf)?; + + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + input_stream: &mut dyn CAIRead, + ) -> Result> { + let mut positions: Vec = Vec::new(); + + // Ensure the stream has the required chunks so we can generate the required offsets. + let output: Vec = Vec::new(); + let mut output_stream = Cursor::new(output); + + add_required_chunks_to_stream(input_stream, &mut output_stream)?; + + let mut png_buf: Vec = Vec::new(); + output_stream.rewind()?; + output_stream + .read_to_end(&mut png_buf) + .map_err(Error::IoError)?; + output_stream.rewind()?; + + let mut cursor = Cursor::new(png_buf); + let ps = get_png_chunk_positions(&mut cursor)?; + + // get back buffer + png_buf = cursor.into_inner(); + + let pcp = ps + .into_iter() + .find(|pcp| pcp.name == CAI_CHUNK) + .ok_or(Error::JumbfNotFound)?; + + positions.push(HashObjectPositions { + offset: pcp.start as usize, + length: pcp.length as usize + PNG_HDR_LEN as usize, + htype: HashBlockObjectType::Cai, + }); + + // add hash of chunks before cai + positions.push(HashObjectPositions { + offset: 0, + length: pcp.start as usize, + htype: HashBlockObjectType::Other, + }); + + // add position from cai to end + let end = pcp.end() as usize; + let file_end = png_buf.len(); + positions.push(HashObjectPositions { + offset: end, // len of cai + length: file_end - end, + htype: HashBlockObjectType::Other, + }); + + Ok(positions) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + // get png byte + let ps = get_png_chunk_positions(input_stream)?; + + // get image bytes + input_stream.rewind()?; + let mut png_buf: Vec = Vec::new(); + input_stream.read_to_end(&mut png_buf)?; + + /* splice in new chunk. Each PNG chunk has the following format: + chunk data length (4 bytes big endian) + chunk identifier (4 byte character sequence) + chunk data (0 - n bytes of chunk data) + chunk crc (4 bytes in crc in format defined in PNG spec) + */ + + // erase existing + let empty_buf = Vec::new(); + let mut iter = ps.into_iter(); + if let Some(existing_cai) = iter.find(|pcp| pcp.name == CAI_CHUNK) { + // replace existing CAI + let start = usize::value_from(existing_cai.start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + let end = usize::value_from(existing_cai.end()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + png_buf.splice(start..end, empty_buf.iter().cloned()); + } + + // save png data + output_stream.write_all(&png_buf)?; + + Ok(()) + } +} + +impl AssetIO for PngIO { + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> { + let mut stream = std::fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut stream, &mut temp_file, store_bytes)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations( + &self, + asset_path: &std::path::Path, + ) -> Result> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(asset_path) + .map_err(Error::IoError)?; + + self.get_object_locations_from_stream(&mut file) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + // get png byte + let mut png_buf = std::fs::read(asset_path).map_err(|_err| Error::EmbeddingError)?; + + let mut cursor = Cursor::new(png_buf); + let ps = get_png_chunk_positions(&mut cursor)?; + + // get back buffer + png_buf = cursor.into_inner(); + + /* splice in new chunk. Each PNG chunk has the following format: + chunk data length (4 bytes big endian) + chunk identifier (4 byte character sequence) + chunk data (0 - n bytes of chunk data) + chunk crc (4 bytes in crc in format defined in PNG spec) + */ + + // erase existing + let empty_buf = Vec::new(); + let mut iter = ps.into_iter(); + if let Some(existing_cai) = iter.find(|pcp| pcp.name == CAI_CHUNK) { + // replace existing CAI + let start = usize::value_from(existing_cai.start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + let end = usize::value_from(existing_cai.end()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + png_buf.splice(start..end, empty_buf.iter().cloned()); + } + + // save png data + std::fs::write(asset_path, png_buf)?; + + Ok(()) + } + + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + PngIO {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(PngIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(PngIO::new(asset_type))) + } + + fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { + Some(self) + } + + fn asset_box_hash_ref(&self) -> Option<&dyn AssetBoxHash> { + Some(self) + } + + fn composed_data_ref(&self) -> Option<&dyn ComposedManifestRef> { + Some(self) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +fn get_xmp_insertion_point(asset_reader: &mut dyn CAIRead) -> Option<(u64, u32)> { + let ps = get_png_chunk_positions(asset_reader).ok()?; + + let xmp_box = ps.iter().find(|pcp| { + if pcp.name == ITXT_CHUNK { + // seek to start of chunk + if asset_reader.seek(SeekFrom::Start(pcp.start + 8)).is_err() { + // move +8 to get past header + return false; + } + + // parse the iTxt block + if let Ok(key) = read_string(asset_reader, pcp.length) { + if key.is_empty() || key.len() > 79 { + return false; + } + + // is this an XMP key + if key == XMP_KEY { + return true; + } + } + false + } else { + false + } + }); + + if let Some(xmp) = xmp_box { + // overwrite existing box + Some((xmp.start, xmp.length + PNG_HDR_LEN as u32)) + } else { + // insert after IHDR + ps.iter() + .find(|png_cp| png_cp.name == IMG_HDR) + .map(|img_hdr| (img_hdr.end(), 0)) + } +} +impl RemoteRefEmbed for PngIO { + #[allow(unused_variables)] + fn embed_reference(&self, asset_path: &Path, embed_ref: RemoteRefEmbedType) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + let output_buf = Vec::new(); + let mut output_stream = Cursor::new(output_buf); + + // do here so source file is closed after update + { + let mut source_stream = std::fs::File::open(asset_path)?; + self.embed_reference_to_stream( + &mut source_stream, + &mut output_stream, + RemoteRefEmbedType::Xmp(manifest_uri), + )?; + } + + std::fs::write(asset_path, output_stream.into_inner())?; + + Ok(()) + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } + + fn embed_reference_to_stream( + &self, + source_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + embed_ref: RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + source_stream.rewind()?; + + let xmp = match self.read_xmp(source_stream) { + Some(s) => s, + None => format!("http://ns.adobe.com/xap/1.0/\0 {}", MIN_XMP), + }; + + // update XMP + let updated_xmp = add_provenance(&xmp, &manifest_uri)?; + + // make XMP chunk + let mut xmp_data = Vec::new(); + let mut xmp_encoder = png_pong::Encoder::new(&mut xmp_data).into_chunk_enc(); + + let mut xmp_chunk = png_pong::chunk::Chunk::InternationalText(InternationalText { + key: XMP_KEY.to_string(), + langtag: "".to_string(), + transkey: "".to_string(), + val: updated_xmp, + compressed: false, + }); + xmp_encoder + .encode(&mut xmp_chunk) + .map_err(|_| Error::EmbeddingError)?; + + // patch output stream + let mut png_buf = Vec::new(); + source_stream.rewind()?; + source_stream + .read_to_end(&mut png_buf) + .map_err(Error::IoError)?; + + if let Some((start, xmp_len)) = get_xmp_insertion_point(source_stream) { + let mut png_buf = Vec::new(); + source_stream.rewind()?; + source_stream + .read_to_end(&mut png_buf) + .map_err(Error::IoError)?; + + // replace existing XMP + let xmp_start = usize::value_from(start) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; // get beginning of chunk which starts 4 bytes before label + + let xmp_end = usize::value_from(start + xmp_len as u64) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; + + png_buf.splice(xmp_start..xmp_end, xmp_data.iter().cloned()); + + output_stream.rewind()?; + output_stream.write_all(&png_buf)?; + + Ok(()) + } else { + Err(Error::EmbeddingError) + } + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } +} + +impl AssetBoxHash for PngIO { + fn get_box_map(&self, input_stream: &mut dyn CAIRead) -> Result> { + input_stream.rewind()?; + + let ps = get_png_chunk_positions(input_stream)?; + + let mut box_maps = Vec::new(); + + // add PNGh header + let pngh_bm = BoxMap { + names: vec!["PNGh".to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: 0, + range_len: 8, + }; + box_maps.push(pngh_bm); + + // add the other boxes + for pc in ps.into_iter() { + // add special C2PA box + if pc.name == CAI_CHUNK { + let c2pa_bm = BoxMap { + names: vec![C2PA_BOXHASH.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: pc.start as usize, + range_len: (pc.length + 12) as usize, // length(4) + name(4) + crc(4) + }; + box_maps.push(c2pa_bm); + continue; + } + + // all other chunks + let c2pa_bm = BoxMap { + names: vec![pc.name_str], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: pc.start as usize, + range_len: (pc.length + 12) as usize, // length(4) + name(4) + crc(4) + }; + box_maps.push(c2pa_bm); + } + + Ok(box_maps) + } +} + +impl ComposedManifestRef for PngIO { + fn compose_manifest(&self, manifest_data: &[u8], _format: &str) -> Result> { + let mut cai_data = Vec::new(); + let mut cai_encoder = png_pong::Encoder::new(&mut cai_data).into_chunk_enc(); + + // create CAI store chunk + let cai_unknown = png_pong::chunk::Unknown { + name: CAI_CHUNK, + data: manifest_data.to_vec(), + }; + + let mut cai_chunk = png_pong::chunk::Chunk::Unknown(cai_unknown); + cai_encoder + .encode(&mut cai_chunk) + .map_err(|_| Error::EmbeddingError)?; + + Ok(cai_data) + } +} + +#[cfg(test)] +#[allow(clippy::panic)] +#[allow(clippy::unwrap_used)] +pub mod tests { + use std::io::Write; + + use memchr::memmem; + + use super::*; + use crate::utils::test::{self, temp_dir_path}; + + #[test] + fn test_png_xmp() { + let ap = test::fixture_path("libpng-test_with_url.png"); + + let png_io = PngIO {}; + let xmp = png_io + .read_xmp(&mut std::fs::File::open(ap).unwrap()) + .unwrap(); + + // make sure we can parse it + let provenance = crate::utils::xmp_inmemory_utils::extract_provenance(&xmp).unwrap(); + + assert!(provenance.contains("libpng-test")); + } + + #[test] + fn test_png_xmp_write() { + let ap = test::fixture_path("libpng-test.png"); + let mut source_stream = std::fs::File::open(ap).unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "out.png"); + let mut output_stream = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(output) + .unwrap(); + + let png_io = PngIO {}; + //let _orig_xmp = png_io + // .read_xmp(&mut source_stream ) + // .unwrap(); + + // change the xmp + let eh = png_io.remote_ref_writer_ref().unwrap(); + eh.embed_reference_to_stream( + &mut source_stream, + &mut output_stream, + RemoteRefEmbedType::Xmp("some test data".to_string()), + ) + .unwrap(); + + output_stream.rewind().unwrap(); + let new_xmp = png_io.read_xmp(&mut output_stream).unwrap(); + // make sure we can parse it + let provenance = crate::utils::xmp_inmemory_utils::extract_provenance(&new_xmp).unwrap(); + + assert!(provenance.contains("some test data")); + } + + #[test] + fn test_png_parse() { + let ap = test::fixture_path("libpng-test.png"); + + let png_bytes = std::fs::read(&ap).unwrap(); + + // grab PNG chunks and positions + let mut f = std::fs::File::open(ap).unwrap(); + let positions = get_png_chunk_positions(&mut f).unwrap(); + + for hop in positions { + if let Some(start) = memmem::find(&png_bytes, &hop.name) { + if hop.start != (start - 4) as u64 { + panic!("find_bytes found the wrong position"); + // assert!(true); + } + + println!( + "Chunk {} position matches, start: {}, length: {} ", + hop.name_str, hop.start, hop.length + ); + } + } + } + + #[test] + fn test_write_cai_using_stream_existing_cai_data() { + let source = include_bytes!("../../tests/fixtures/exp-test1.png"); + let mut stream = Cursor::new(source.to_vec()); + let png_io = PngIO {}; + + // cai data already exists + assert!(matches!( + png_io.read_cai(&mut stream), + Ok(data) if !data.is_empty(), + )); + + // write new data + let output: Vec = Vec::new(); + let mut output_stream = Cursor::new(output); + + let data_to_write: Vec = vec![0, 1, 1, 2, 3, 5, 8, 13, 21, 34]; + assert!(png_io + .write_cai(&mut stream, &mut output_stream, &data_to_write) + .is_ok()); + + // new data replaces the existing cai data + let data_written = png_io.read_cai(&mut output_stream).unwrap(); + assert_eq!(data_to_write, data_written); + } + + #[test] + fn test_write_cai_using_stream_no_cai_data() { + let source = include_bytes!("../../tests/fixtures/libpng-test.png"); + let mut stream = Cursor::new(source.to_vec()); + let png_io = PngIO {}; + + // no cai data present in stream. + assert!(matches!( + png_io.read_cai(&mut stream), + Err(Error::JumbfNotFound) + )); + + // write new data. + let output: Vec = Vec::new(); + let mut output_stream = Cursor::new(output); + + let data_to_write: Vec = vec![0, 1, 1, 2, 3, 5, 8, 13, 21, 34]; + assert!(png_io + .write_cai(&mut stream, &mut output_stream, &data_to_write) + .is_ok()); + + // assert new cai data is present. + let data_written = png_io.read_cai(&mut output_stream).unwrap(); + assert_eq!(data_to_write, data_written); + } + + #[test] + fn test_write_cai_data_to_stream_wrong_format() { + let source = include_bytes!("../../tests/fixtures/C.jpg"); + let mut stream = Cursor::new(source.to_vec()); + let png_io = PngIO {}; + + let output: Vec = Vec::new(); + let mut output_stream = Cursor::new(output); + assert!(matches!( + png_io.write_cai(&mut stream, &mut output_stream, &[]), + Err(Error::InvalidAsset(_),) + )); + } + + #[test] + fn test_stream_object_locations() { + let source = include_bytes!("../../tests/fixtures/exp-test1.png"); + let mut stream = Cursor::new(source.to_vec()); + let png_io = PngIO {}; + let cai_pos = png_io + .get_object_locations_from_stream(&mut stream) + .unwrap() + .into_iter() + .find(|pos| pos.htype == HashBlockObjectType::Cai) + .unwrap(); + + assert_eq!(cai_pos.offset, 33); + assert_eq!(cai_pos.length, 3439701); + } + + #[test] + fn test_stream_object_locations_with_incorrect_file_type() { + let source = include_bytes!("../../tests/fixtures/unsupported_type.txt"); + let mut stream = Cursor::new(source.to_vec()); + let png_io = PngIO {}; + assert!(matches!( + png_io.get_object_locations_from_stream(&mut stream), + Err(Error::UnsupportedType) + )); + } + + #[test] + fn test_stream_object_locations_adds_offsets_to_file_without_claims() { + let source = include_bytes!("../../tests/fixtures/libpng-test.png"); + let mut stream = Cursor::new(source.to_vec()); + + let png_io = PngIO {}; + assert!(png_io + .get_object_locations_from_stream(&mut stream) + .unwrap() + .into_iter() + .any(|chunk| chunk.htype == HashBlockObjectType::Cai)); + } + + #[test] + fn test_remove_c2pa() { + let source = test::fixture_path("exp-test1.png"); + let temp_dir = tempfile::tempdir().unwrap(); + let output = test::temp_dir_path(&temp_dir, "exp-test1_tmp.png"); + std::fs::copy(source, &output).unwrap(); + + let png_io = PngIO {}; + png_io.remove_cai_store(&output).unwrap(); + + // read back in asset, JumbfNotFound is expected since it was removed + match png_io.read_cai_store(&output) { + Err(Error::JumbfNotFound) => (), + _ => unreachable!(), + } + } + + #[test] + fn test_remove_c2pa_from_stream() { + let source = crate::utils::test::fixture_path("exp-test1.png"); + + let source_bytes = std::fs::read(source).unwrap(); + let mut source_stream = Cursor::new(source_bytes); + + let png_io = PngIO {}; + let png_writer = png_io.get_writer("png").unwrap(); + + let output_bytes = Vec::new(); + let mut output_stream = Cursor::new(output_bytes); + + png_writer + .remove_cai_store_from_stream(&mut source_stream, &mut output_stream) + .unwrap(); + + // read back in asset, JumbfNotFound is expected since it was removed + let png_reader = png_io.get_reader(); + match png_reader.read_cai(&mut output_stream) { + Err(Error::JumbfNotFound) => (), + _ => unreachable!(), + } + } + + #[test] + fn test_embeddable_manifest() { + let png_io = PngIO {}; + + let source = crate::utils::test::fixture_path("exp-test1.png"); + + let ol = png_io.get_object_locations(&source).unwrap(); + + let cai_loc = ol + .iter() + .find(|o| o.htype == HashBlockObjectType::Cai) + .unwrap(); + let curr_manifest = png_io.read_cai_store(&source).unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let output = crate::utils::test::temp_dir_path(&temp_dir, "exp-test1-out.png"); + + std::fs::copy(source, &output).unwrap(); + + // remove existing + png_io.remove_cai_store(&output).unwrap(); + + // generate new manifest data + let em = png_io + .composed_data_ref() + .unwrap() + .compose_manifest(&curr_manifest, "png") + .unwrap(); + + // insert new manifest + let outbuf = Vec::new(); + let mut out_stream = Cursor::new(outbuf); + + let mut before = vec![0u8; cai_loc.offset]; + let mut in_file = std::fs::File::open(&output).unwrap(); + + // write before + in_file.read_exact(before.as_mut_slice()).unwrap(); + out_stream.write_all(&before).unwrap(); + + // write composed bytes + out_stream.write_all(&em).unwrap(); + + // write bytes after + let mut after_buf = Vec::new(); + in_file.read_to_end(&mut after_buf).unwrap(); + out_stream.write_all(&after_buf).unwrap(); + + // read manifest back in from new in-memory PNG + out_stream.rewind().unwrap(); + let restored_manifest = png_io.read_cai(&mut out_stream).unwrap(); + + assert_eq!(&curr_manifest, &restored_manifest); + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/riff_io.rs b/sdk/crates/c2pa-codecs/src/codecs/riff_io.rs new file mode 100644 index 000000000..afd497033 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/riff_io.rs @@ -0,0 +1,864 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + fs::{File, OpenOptions}, + io::{Cursor, Seek, SeekFrom, Write}, + path::Path, +}; + +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use conv::ValueFrom; +use riff::*; +use tempfile::Builder; + +use crate::{ + asset_io::{ + rename_or_move, AssetIO, AssetPatch, CAIRead, CAIReadWrapper, CAIReadWrite, + CAIReadWriteWrapper, CAIReader, CAIWriter, HashBlockObjectType, HashObjectPositions, + RemoteRefEmbed, RemoteRefEmbedType, + }, + error::{Error, Result}, + utils::xmp_inmemory_utils::{add_provenance, MIN_XMP}, +}; + +static SUPPORTED_TYPES: [&str; 12] = [ + "avi", + "wav", + "webp", + "image/webp", + "audio/wav", + "audio/wave", + "audio/x-wav", + "audio/vnd.wave", + "application/x-troff-msvideo", + "video/avi", + "video/msvideo", + "video/x-msvideo", +]; + +pub struct RiffIO { + #[allow(dead_code)] + riff_format: String, // can be used for specialized RIFF cases +} + +const C2PA_CHUNK_ID: ChunkId = ChunkId { + value: [0x43, 0x32, 0x50, 0x41], +}; // C2PA + +const VP8X_ID: ChunkId = ChunkId { + value: [0x56, 0x50, 0x38, 0x58], +}; // VP8X chunk to hold auxiliary info + +const VP8_ID: ChunkId = ChunkId { + value: [0x56, 0x50, 0x38, 0x20], +}; // VP8 chunk + +const VP8L_ID: ChunkId = ChunkId { + value: [0x56, 0x50, 0x38, 0x4c], +}; // VP8L chunk + +const XMP_CHUNK_ID: ChunkId = ChunkId { + value: [0x58, 0x4d, 0x50, 0x20], +}; // XMP + +const XMP_FLAG: u32 = 4; + +fn read_items(iter: &mut T) -> Vec +where + T: Iterator, +{ + let mut vec: Vec = Vec::new(); + for item in iter { + vec.push(item); + } + vec +} + +fn get_height_and_width(chunk_contents: &[ChunkContents]) -> Result<(u16, u16)> { + if let Some(ChunkContents::Data(_id, chunk_data)) = chunk_contents.iter().find(|c| match c { + ChunkContents::Data(id, _) => *id == VP8L_ID, + _ => false, + }) { + let mut chunk_stream = Cursor::new(chunk_data); + chunk_stream.seek(SeekFrom::Start(1))?; // skip signature byte + + // width and length are 12 bits packed together + let first_bytes = chunk_stream.read_u16::()?; + let width = 1 + (first_bytes & 0x3fff); // add 1 for VP8L + let last_two = (first_bytes & 0xc000) >> 14; // last two bits of first bytes are first 2 of height + let height = 1 + (((chunk_stream.read_u16::()? & 0xfff) << 2) | last_two); + + return Ok((height, width)); + } + + if let Some(ChunkContents::Data(_id, chunk_data)) = chunk_contents.iter().find(|c| match c { + ChunkContents::Data(id, _) => *id == VP8_ID, + _ => false, + }) { + let mut chunk_stream = Cursor::new(chunk_data); + chunk_stream.seek(SeekFrom::Start(6))?; // skip frame tag and start code + + let width = chunk_stream.read_u16::()? & 0x3fff; + let height = chunk_stream.read_u16::()? & 0x3fff; + + return Ok((height, width)); + } + + Err(Error::InvalidAsset( + "WEBP missing VP8 or VP8L segment".to_string(), + )) +} + +fn inject_c2pa( + chunk: &Chunk, + stream: &mut T, + data: &[u8], + xmp_data: Option<&[u8]>, + format: &str, +) -> Result +where + T: std::io::Seek + std::io::Read, +{ + let id = chunk.id(); + let is_riff_chunk: bool = id == riff::RIFF_ID; + stream.rewind()?; + + if is_riff_chunk || id == riff::LIST_ID { + let chunk_type = chunk.read_type(stream).map_err(|_| { + Error::InvalidAsset("RIFF handler could not parse file format {format}".to_string()) + })?; + let mut children = read_items(&mut chunk.iter(stream)); + let mut children_contents: Vec = Vec::new(); + + if is_riff_chunk && !data.is_empty() { + // remove c2pa manifest store in RIFF chunk + children.retain(|c| c.id() != C2PA_CHUNK_ID); + } + + if is_riff_chunk && xmp_data.is_some() { + // remove XMP in RIFF chunk so we can replace + children.retain(|c| c.id() != XMP_CHUNK_ID); + } + + // duplicate all top level children + for child in children { + children_contents.push(inject_c2pa(&child, stream, data, xmp_data, format)?); + } + + // add XMP if needed + if let Some(xmp) = xmp_data { + if is_riff_chunk && !xmp.is_empty() { + // if this is a webp doc we must also update VP8X + if format == "webp" { + // if already present we can patch otherwise add + if let Some(ChunkContents::Data(_id, chunk_data)) = + children_contents.iter_mut().find(|c| match c { + ChunkContents::Data(id, _) => *id == VP8X_ID, + _ => false, + }) + { + let mut chunk_stream = Cursor::new(chunk_data); + + let mut flags = chunk_stream.read_u32::()?; + + // add in XMP flag + flags |= XMP_FLAG; + + chunk_stream.rewind()?; + + // write back changes + chunk_stream.write_u32::(flags)?; + } else { + // add new VP8X + + // get height and width from VBL + if let Ok((height, width)) = get_height_and_width(&children_contents) { + let data: Vec = Vec::new(); + let mut chunk_writer = Cursor::new(data); + + let flags: u32 = XMP_FLAG; + let vp8x_height = height as u32 - 1; + let vp8x_width = width as u32 - 1; + + // write flags + chunk_writer.write_u32::(flags)?; + + // write width then height + chunk_writer.write_u24::(vp8x_width)?; + chunk_writer.write_u24::(vp8x_height)?; + + // make new VP8X chunk and prepend to children list + let mut tmp_vec: Vec = Vec::new(); + tmp_vec.push(ChunkContents::Data(VP8X_ID, chunk_writer.into_inner())); + tmp_vec.extend(children_contents); + children_contents = tmp_vec; + } else { + return Err(Error::InvalidAsset( + "Could not parse VP8 or VP8L".to_string(), + )); + } + } + } + + children_contents.push(ChunkContents::Data(XMP_CHUNK_ID, xmp.to_vec())); + } + } + + // place at the end for maximum compatibility + if is_riff_chunk && !data.is_empty() { + children_contents.push(ChunkContents::Data(C2PA_CHUNK_ID, data.to_vec())); + } + + Ok(ChunkContents::Children(id, chunk_type, children_contents)) + } else if id == riff::SEQT_ID { + let children = read_items(&mut chunk.iter_no_type(stream)); + let mut children_contents: Vec = Vec::new(); + + for child in children { + children_contents.push(inject_c2pa(&child, stream, data, xmp_data, format)?); + } + + Ok(ChunkContents::ChildrenNoType(id, children_contents)) + } else { + let contents = chunk + .read_contents(stream) + .map_err(|_| Error::InvalidAsset("RIFF handler could not parse file".to_string()))?; + Ok(ChunkContents::Data(id, contents)) + } +} + +fn get_manifest_pos(reader: &mut dyn CAIRead) -> Option<(u64, u32)> { + let mut asset: Vec = Vec::new(); + reader.rewind().ok()?; + reader.read_to_end(&mut asset).ok()?; + + let mut chunk_reader = Cursor::new(asset); + + let top_level_chunks = riff::Chunk::read(&mut chunk_reader, 0).ok()?; + + if top_level_chunks.id() == RIFF_ID { + for c in top_level_chunks.iter(&mut chunk_reader) { + if c.id() == C2PA_CHUNK_ID { + return Some((c.offset(), c.len() + 8)); // 8 is len of data chunk header + } + } + } + None +} + +impl CAIReader for RiffIO { + fn read_cai(&self, input_stream: &mut dyn CAIRead) -> Result> { + let mut chunk_reader = CAIReadWrapper { + reader: input_stream, + }; + + let top_level_chunks = riff::Chunk::read(&mut chunk_reader, 0)?; + + if top_level_chunks.id() != RIFF_ID { + return Err(Error::InvalidAsset("Invalid RIFF format".to_string())); + } + + for c in top_level_chunks.iter(&mut chunk_reader) { + if c.id() == C2PA_CHUNK_ID { + return Ok(c.read_contents(&mut chunk_reader)?); + } + } + + Err(Error::JumbfNotFound) + } + + // Get XMP block + fn read_xmp(&self, input_stream: &mut dyn CAIRead) -> Option { + let top_level_chunks = { + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + Chunk::read(&mut reader, 0).ok()? + }; + + if top_level_chunks.id() != RIFF_ID { + return None; + } + + let mut chunk_reader = CAIReadWrapper { + reader: input_stream, + }; + + for c in top_level_chunks.iter(&mut chunk_reader) { + if c.id() == XMP_CHUNK_ID { + let output = c.read_contents(&mut chunk_reader).ok()?; + let output_string = String::from_utf8_lossy(&output); + + return Some(output_string.to_string()); + } + } + + None + } +} + +fn add_required_chunks( + asset_type: &str, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, +) -> Result<()> { + let aio = RiffIO::new(asset_type); + + match aio.read_cai(input_stream) { + Ok(_) => { + // just clone + input_stream.rewind()?; + output_stream.rewind()?; + std::io::copy(input_stream, output_stream)?; + Ok(()) + } + Err(_) => { + input_stream.rewind()?; + aio.write_cai(input_stream, output_stream, &[1, 2, 3, 4]) // save arbitrary data + } + } +} + +impl AssetIO for RiffIO { + fn new(riff_format: &str) -> Self { + RiffIO { + riff_format: riff_format.to_string(), + } + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(RiffIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(RiffIO::new(asset_type))) + } + + fn asset_patch_ref(&self) -> Option<&dyn AssetPatch> { + Some(self) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut input_stream = File::open(asset_path)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations( + &self, + asset_path: &std::path::Path, + ) -> Result> { + let mut f = std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?; + + self.get_object_locations_from_stream(&mut f) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + self.save_cai_store(asset_path, &[]) + } + + fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { + Some(self) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +impl CAIWriter for RiffIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + let top_level_chunks = { + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + Chunk::read(&mut reader, 0)? + }; + + if top_level_chunks.id() != RIFF_ID { + return Err(Error::InvalidAsset("Invalid RIFF format".to_string())); + } + + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + + // replace/add manifest in memory + let new_contents = inject_c2pa( + &top_level_chunks, + &mut reader, + store_bytes, + None, + &self.riff_format, + )?; + + let mut writer = CAIReadWriteWrapper { + reader_writer: output_stream, + }; + + // save contents + new_contents + .write(&mut writer) + .map_err(|_e| Error::EmbeddingError)?; + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + input_stream: &mut dyn CAIRead, + ) -> Result> { + let output_buf: Vec = Vec::new(); + let mut output_stream = Cursor::new(output_buf); + + add_required_chunks(&self.riff_format, input_stream, &mut output_stream)?; + + let mut positions: Vec = Vec::new(); + + let (manifest_pos, manifest_len) = + get_manifest_pos(&mut output_stream).ok_or(Error::EmbeddingError)?; + + positions.push(HashObjectPositions { + offset: usize::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + length: usize::value_from(manifest_len) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Cai, + }); + + // add hash of chunks before cai + positions.push(HashObjectPositions { + offset: 0, + length: usize::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Other, + }); + + // add position from cai to end + let end = u64::value_from(manifest_pos) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))? + + u64::value_from(manifest_len) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let file_end = output_stream.seek(SeekFrom::End(0))?; + positions.push(HashObjectPositions { + offset: usize::value_from(end) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, // len of cai + length: usize::value_from(file_end - end) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?, + htype: HashBlockObjectType::Other, + }); + + Ok(positions) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + self.write_cai(input_stream, output_stream, &[]) + } +} + +impl AssetPatch for RiffIO { + fn patch_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut asset = OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open(asset_path)?; + + let (manifest_pos, manifest_len) = + get_manifest_pos(&mut asset).ok_or(Error::EmbeddingError)?; + + if store_bytes.len() + 8 == manifest_len as usize { + asset.seek(SeekFrom::Start(manifest_pos + 8))?; // skip 8 byte chunk data header + asset.write_all(store_bytes)?; + Ok(()) + } else { + Err(Error::InvalidAsset( + "patch_cai_store store size mismatch.".to_string(), + )) + } + } +} + +impl RemoteRefEmbed for RiffIO { + #[allow(unused_variables)] + fn embed_reference( + &self, + asset_path: &Path, + embed_ref: crate::asset_io::RemoteRefEmbedType, + ) -> Result<()> { + let mut input_stream = File::open(asset_path)?; + + let mut output_stream = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(asset_path) + .map_err(Error::IoError)?; + + self.embed_reference_to_stream(&mut input_stream, &mut output_stream, embed_ref) + } + + fn embed_reference_to_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + embed_ref: RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + match self.riff_format.as_ref() { + "avi" | "wav" | "webp" => { + if let Some(curr_xmp) = self.read_xmp(input_stream) { + let mut new_xmp = add_provenance(&curr_xmp, &manifest_uri)?; + if new_xmp.len() % 2 == 1 { + // pad if needed to even length + new_xmp.push(' '); + } + + let top_level_chunks = { + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + Chunk::read(&mut reader, 0)? + }; + + if top_level_chunks.id() != RIFF_ID { + return Err(Error::InvalidAsset("Invalid RIFF format".to_string())); + } + + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + + // replace/add manifest in memory + let new_contents = inject_c2pa( + &top_level_chunks, + &mut reader, + &[], + Some(new_xmp.as_bytes()), + &self.riff_format, + )?; + + // save contents + let mut writer = CAIReadWriteWrapper { + reader_writer: output_stream, + }; + new_contents + .write(&mut writer) + .map_err(|_e| Error::EmbeddingError)?; + Ok(()) + } else { + let mut new_xmp = add_provenance(MIN_XMP, &manifest_uri)?; + + if new_xmp.len() % 2 == 1 { + // pad if needed to even length + new_xmp.push(' '); + } + + let top_level_chunks = { + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + Chunk::read(&mut reader, 0)? + }; + + if top_level_chunks.id() != RIFF_ID { + return Err(Error::InvalidAsset("Invalid RIFF format".to_string())); + } + + let mut reader = CAIReadWrapper { + reader: input_stream, + }; + + // replace/add manifest in memory + let new_contents = inject_c2pa( + &top_level_chunks, + &mut reader, + &[], + Some(new_xmp.as_bytes()), + &self.riff_format, + )?; + + // save contents + let mut writer = CAIReadWriteWrapper { + reader_writer: output_stream, + }; + new_contents + .write(&mut writer) + .map_err(|_e| Error::EmbeddingError)?; + Ok(()) + } + } + _ => Err(Error::UnsupportedType), + } + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } +} + +#[cfg(test)] +pub mod tests { + #![allow(clippy::expect_used)] + #![allow(clippy::panic)] + #![allow(clippy::unwrap_used)] + + use tempfile::tempdir; + + use super::*; + use crate::utils::{ + hash_utils::vec_compare, + test::{fixture_path, temp_dir_path}, + xmp_inmemory_utils::extract_provenance, + }; + + #[test] + fn test_write_wav() { + let more_data = "some more test data".as_bytes(); + let source = fixture_path("sample1.wav"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "sample1-wav.wav"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let riff_io = RiffIO::new("wav"); + + if let Ok(()) = riff_io.save_cai_store(&output, more_data) { + if let Ok(read_test_data) = riff_io.read_cai_store(&output) { + assert!(vec_compare(more_data, &read_test_data)); + success = true; + } + } + } + } + assert!(success) + } + + #[test] + fn test_write_wav_stream() { + let more_data = "some more test data".as_bytes(); + let mut source = File::open(fixture_path("sample1.wav")).unwrap(); + + let riff_io = RiffIO::new("wav"); + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "sample1-wav.wav"); + + let mut output_stream = File::create(&output).unwrap(); + + riff_io + .write_cai(&mut source, &mut output_stream, more_data) + .unwrap(); + + let mut source = File::open(output).unwrap(); + let read_test_data = riff_io.read_cai(&mut source).unwrap(); + assert!(vec_compare(more_data, &read_test_data)); + } + } + + #[test] + fn test_patch_write_wav() { + let test_data = "some test data".as_bytes(); + let source = fixture_path("sample1.wav"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "sample1-wav.wav"); + + if let Ok(_size) = std::fs::copy(source, &output) { + let riff_io = RiffIO::new("wav"); + + if let Ok(()) = riff_io.save_cai_store(&output, test_data) { + if let Ok(source_data) = riff_io.read_cai_store(&output) { + // create replacement data of same size + let mut new_data = vec![0u8; source_data.len()]; + new_data[..test_data.len()].copy_from_slice(test_data); + riff_io.patch_cai_store(&output, &new_data).unwrap(); + + let replaced = riff_io.read_cai_store(&output).unwrap(); + + assert_eq!(new_data, replaced); + + success = true; + } + } + } + } + assert!(success) + } + + #[test] + fn test_remove_c2pa() { + let source = fixture_path("sample1.wav"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "sample1-wav.wav"); + + std::fs::copy(source, &output).unwrap(); + let riff_io = RiffIO::new("wav"); + + riff_io.remove_cai_store(&output).unwrap(); + + // read back in asset, JumbfNotFound is expected since it was removed + match riff_io.read_cai_store(&output) { + Err(Error::JumbfNotFound) => (), + _ => unreachable!(), + } + } + + #[test] + fn test_read_xmp() { + let source = fixture_path("test_xmp.webp"); + let mut reader = std::fs::File::open(source).unwrap(); + + let riff_io = RiffIO::new("webp"); + + let xmp = riff_io.read_xmp(&mut reader).unwrap(); + println!("XMP: {xmp}"); + } + + #[test] + fn test_write_xmp() { + let more_data = "some more test data"; + let source = fixture_path("test_xmp.webp"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "test_xmp.webp"); + + std::fs::copy(source, &output).unwrap(); + + let riff_io = RiffIO::new("webp"); + + if let Some(embed_handler) = riff_io.remote_ref_writer_ref() { + if let Ok(()) = embed_handler.embed_reference( + output.as_path(), + RemoteRefEmbedType::Xmp(more_data.to_string()), + ) { + let mut output_stream = std::fs::File::open(&output).unwrap(); + + // check the xmp + if let Some(xmp) = riff_io.read_xmp(&mut output_stream) { + println!("XMP: {xmp}"); + + if let Some(xmp_val) = extract_provenance(&xmp) { + if xmp_val == more_data { + success = true; + } + } + } + } + } + } + assert!(success) + } + + #[test] + fn test_insert_xmp() { + let more_data = "some more test data"; + let source = fixture_path("test.webp"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "test.webp"); + + std::fs::copy(source, &output).unwrap(); + + let riff_io = RiffIO::new("webp"); + + if let Some(embed_handler) = riff_io.remote_ref_writer_ref() { + if let Ok(()) = embed_handler.embed_reference( + output.as_path(), + RemoteRefEmbedType::Xmp(more_data.to_string()), + ) { + let mut output_stream = std::fs::File::open(&output).unwrap(); + + // check the xmp + if let Some(xmp) = riff_io.read_xmp(&mut output_stream) { + println!("XMP: {xmp}"); + + if let Some(xmp_val) = extract_provenance(&xmp) { + if xmp_val == more_data { + success = true; + } + } + } + } + } + } + assert!(success) + } + + #[test] + fn test_insert_xmp_lossless() { + let more_data = "some more test data"; + let source = fixture_path("test_lossless.webp"); + + let mut success = false; + if let Ok(temp_dir) = tempdir() { + let output = temp_dir_path(&temp_dir, "test_lossless.webp"); + + std::fs::copy(source, &output).unwrap(); + + let riff_io = RiffIO::new("webp"); + + if let Some(embed_handler) = riff_io.remote_ref_writer_ref() { + if let Ok(()) = embed_handler.embed_reference( + output.as_path(), + RemoteRefEmbedType::Xmp(more_data.to_string()), + ) { + let mut output_stream = std::fs::File::open(&output).unwrap(); + + // check the xmp + if let Some(xmp) = riff_io.read_xmp(&mut output_stream) { + println!("XMP: {xmp}"); + + if let Some(xmp_val) = extract_provenance(&xmp) { + if xmp_val == more_data { + success = true; + } + } + } + } + } + } + assert!(success) + } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/svg.rs b/sdk/crates/c2pa-codecs/src/codecs/svg.rs new file mode 100644 index 000000000..68a8b6858 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/svg.rs @@ -0,0 +1,785 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write}; + +use fast_xml::{ + events::{BytesText, Event}, + Reader, Writer, +}; + +use crate::{ + ByteSpan, C2paSpan, CodecError, Decode, DefaultSpan, Embed, Embeddable, Encode, EncodeInPlace, + Span, Support, +}; + +const SVG: &str = "svg"; +const METADATA: &str = "metadata"; +const MANIFEST: &str = "c2pa:manifest"; +const MANIFEST_NS: &str = "xmlns:c2pa"; +const MANIFEST_NS_VAL: &str = "http://c2pa.org/manifest"; + +#[derive(Debug)] +pub struct SvgCodec { + src: R, +} + +impl SvgCodec { + pub fn new(src: R) -> Self { + Self { src } + } +} + +impl Support for SvgCodec<()> { + const MAX_SIGNATURE_LEN: usize = 0; + + // TODO: does this impl cover all cases? it should also run last due to the computation + // we can probably also add a short circuit type of method, where if the first few bytes + // aren't xml it isn't an svg + // TODO: we also need to reset the stream to the first x bytes when this returns + fn supports_stream(src: impl Read + Seek) -> Result { + let mut src = BufReader::new(src); + let mut reader = Reader::from_reader(&mut src); + + let mut event = Vec::new(); + loop { + match reader.read_event(&mut event) { + Ok(Event::Start(ref e)) => { + if e.name() == SVG.as_bytes() { + return Ok(true); + } + } + Ok(Event::Eof) | Err(_) => break, + _ => {} + } + + event.clear(); + } + + Ok(false) + } + + fn supports_extension(extension: &str) -> bool { + matches!(extension, "svg" | "xhtml" | "xml") + } + + fn supports_mime(mime: &str) -> bool { + matches!( + mime, + "application/svg+xml" + | "application/xhtml+xml" + | "application/xml" + | "image/svg+xml" + | "text/xml" + ) + } +} + +impl Embed for SvgCodec { + fn embeddable(bytes: &[u8]) -> Result { + todo!() + } + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError> { + todo!() + } +} + +impl Decode for SvgCodec { + fn read_c2pa(&mut self) -> Result>, CodecError> { + let (decoded_manifest_opt, _detected_tag_location, _insertion_point) = + detect_manifest_location(&mut self.src)?; + + match decoded_manifest_opt { + Some(decoded_manifest) => { + if !decoded_manifest.is_empty() { + Ok(Some(decoded_manifest)) + } else { + Ok(None) + } + } + None => Ok(None), + } + } +} + +// create manifest entry +fn create_manifest_tag(data: &[u8], with_meta: bool) -> Result, CodecError> { + let mut output: Vec = Vec::with_capacity(data.len() + 256); + let mut writer = Writer::new(Cursor::new(output)); + + let encoded = base64::encode(data); + + if with_meta { + writer + .create_element(METADATA) + .write_inner_content(|writer| { + writer + .create_element(MANIFEST) + .with_attribute((MANIFEST_NS, MANIFEST_NS_VAL)) + .write_text_content(BytesText::from_plain_str(&encoded))?; + Ok(()) + }) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "failed to create manifest tag with metadata".to_owned(), + })?; + } else { + writer + .create_element(MANIFEST) + .with_attribute((MANIFEST_NS, MANIFEST_NS_VAL)) + .write_text_content(BytesText::from_plain_str(&encoded)) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "failed to create manifest tag".to_string(), + })?; + } + + output = writer.into_inner().into_inner(); + + Ok(output) +} + +enum DetectedTagsDepth { + Metadata, + Manifest, + Empty, +} + +// returns tuple of found manifest, where in the XML hierarchy the manifest needs to go, and the manifest insertion point +fn detect_manifest_location( + mut src: impl Read + Seek, +) -> Result<(Option>, DetectedTagsDepth, usize), CodecError> { + src.rewind()?; + + let mut buf = Vec::new(); + + let buf_reader = BufReader::new(&mut src); + + // TODO: quickxml doesn't require an internal bufreader + let mut xml_reader = Reader::from_reader(buf_reader); + + let mut xml_path: Vec = Vec::new(); + + let mut detected_level = DetectedTagsDepth::Empty; + let mut insertion_point = 0; + + let mut output: Option> = None; + + loop { + match xml_reader.read_event(&mut buf) { + Ok(Event::Start(ref e)) => { + let name = String::from_utf8_lossy(e.name()).into_owned(); + xml_path.push(name); + + if xml_path.len() == 2 && xml_path[0] == SVG && xml_path[1] == METADATA { + detected_level = DetectedTagsDepth::Metadata; + insertion_point = xml_reader.buffer_position(); + } + + if xml_path.len() == 3 + && xml_path[0] == SVG + && xml_path[1] == METADATA + && xml_path[2] == MANIFEST + { + detected_level = DetectedTagsDepth::Manifest; + insertion_point = xml_reader.buffer_position(); + + let mut temp_buf = Vec::new(); + let s = xml_reader + .read_text(e.name(), &mut temp_buf) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML manifest tag invalid content".to_string(), + })?; + + output = Some(base64::decode(&s).map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML bad base64 encoding".to_string(), + })?); + } + + if xml_path.len() == 1 && xml_path[0] == SVG { + detected_level = DetectedTagsDepth::Empty; + insertion_point = xml_reader.buffer_position(); + } + } + Ok(Event::End(_)) => { + let _p = xml_path.pop(); + } + Ok(Event::Eof) => break, + Err(err) => { + return Err(CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML invalid".to_string(), + }) + } + _ => (), + } + } + + Ok((output, detected_level, insertion_point)) +} + +fn add_required_segs_to_stream( + mut src: impl Read + Seek, + mut dst: impl Write + Seek, +) -> Result<(), CodecError> { + let (encoded_manifest_opt, _detected_tag_location, _insertion_point) = + detect_manifest_location(&mut src)?; + + let need_manifest = if let Some(encoded_manifest) = encoded_manifest_opt { + encoded_manifest.is_empty() + } else { + true + }; + + if need_manifest { + // add some data + let data: &str = "placeholder manifest"; + + let mut codec = SvgCodec::new(&mut src); + codec.write_c2pa(dst, data.as_bytes())?; + } else { + // just clone + src.rewind()?; + dst.rewind()?; + std::io::copy(&mut src, &mut dst)?; + } + + Ok(()) +} + +impl Encode for SvgCodec { + fn write_c2pa(&mut self, dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + self.src.rewind()?; + let (_encoded_manifest, detected_tag_location, _insertion_point) = + detect_manifest_location(&mut self.src)?; + + self.src.rewind()?; + let buf_reader = BufReader::new(&mut self.src); + let mut reader = Reader::from_reader(buf_reader); + + let mut writer = Writer::new(dst); + + let mut buf = Vec::new(); + let mut xml_path: Vec = Vec::new(); + + match detected_tag_location { + DetectedTagsDepth::Metadata => { + // add manifest case + let manifest_data = create_manifest_tag(c2pa, false)?; + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(e)) => { + let name = String::from_utf8_lossy(e.name()).into_owned(); + xml_path.push(name); + + // writes the event to the writer + writer.write_event(Event::Start(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + + // add manifest data + if xml_path.len() == 2 && xml_path[0] == SVG && xml_path[1] == METADATA + { + writer.write(&manifest_data).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + } + Ok(Event::Eof) => break, + Ok(Event::End(e)) => { + let _p = xml_path.pop(); + writer.write_event(Event::End(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + Ok(e) => { + writer + .write_event(&e) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })? + } + Err(err) => { + return Err(CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML invalid".to_string(), + }) + } + } + buf.clear(); + } + } + DetectedTagsDepth::Manifest => { + // replace manifest case + let encoded = base64::encode(c2pa); + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(e)) => { + let name = String::from_utf8_lossy(e.name()).into_owned(); + xml_path.push(name); + + // writes the event to the writer + writer.write_event(Event::Start(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + Ok(Event::Text(e)) => { + // add manifest data + if xml_path.len() == 3 + && xml_path[0] == SVG + && xml_path[1] == METADATA + && xml_path[2] == MANIFEST + { + writer.write(encoded.as_bytes()).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } else { + writer.write_event(Event::Text(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; // pass Event through + } + } + Ok(Event::Eof) => break, + Ok(Event::End(e)) => { + let _p = xml_path.pop(); + writer.write_event(Event::End(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + Ok(e) => { + writer + .write_event(&e) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })? + } + Err(err) => { + return Err(CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML invalid".to_string(), + }) + } + } + buf.clear(); + } + } + DetectedTagsDepth::Empty => { + //add metadata & manifest case + let manifest_data = create_manifest_tag(c2pa, true)?; + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(e)) => { + let name = String::from_utf8_lossy(e.name()).into_owned(); + xml_path.push(name); + + // writes the event to the writer + writer.write_event(Event::Start(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + + // add manifest data + if xml_path.len() == 1 && xml_path[0] == SVG { + writer.write(&manifest_data).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + } + Ok(Event::Eof) => break, + Ok(Event::End(e)) => { + let _p = xml_path.pop(); + writer.write_event(Event::End(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; + } + Ok(e) => { + writer + .write_event(&e) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })? + } + Err(err) => { + return Err(CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML invalid".to_string(), + }) + } + } + buf.clear(); + } + } + } + + Ok(()) + } + + fn remove_c2pa(&mut self, dst: impl Write) -> Result { + self.src.rewind()?; + + let buf_reader = BufReader::new(&mut self.src); + let mut reader = Reader::from_reader(buf_reader); + + let mut writer = Writer::new(dst); + + let mut buf = Vec::new(); + let mut xml_path: Vec = Vec::new(); + + let mut removed = false; + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(e)) => { + let name = String::from_utf8_lossy(e.name()).into_owned(); + xml_path.push(name); + + if xml_path.len() == 3 + && xml_path[0] == SVG + && xml_path[1] == METADATA + && xml_path[2] == MANIFEST + { + removed = true; + // skip the manifest + continue; + } else { + writer.write_event(Event::Start(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; // pass Event through + } + } + Ok(Event::Text(e)) => { + if xml_path.len() == 3 + && xml_path[0] == SVG + && xml_path[1] == METADATA + && xml_path[2] == MANIFEST + { + removed = true; + // skip the manifest + continue; + } else { + writer.write_event(Event::Text(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; // pass Event through + } + } + Ok(Event::Eof) => break, + Ok(Event::End(e)) => { + if xml_path.len() == 3 + && xml_path[0] == SVG + && xml_path[1] == METADATA + && xml_path[2] == MANIFEST + { + removed = true; + // skip the manifest + let _p = xml_path.pop(); + continue; + } else { + let _p = xml_path.pop(); + writer.write_event(Event::End(e)).map_err(|err| { + CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + } + })?; // pass Event through + } + } + Ok(e) => writer + .write_event(&e) + .map_err(|err| CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "TODO".to_string(), + })?, + Err(err) => { + return Err(CodecError::InvalidAsset { + src: Some(err.to_string()), + context: "XML invalid".to_string(), + }) + } + } + buf.clear(); + } + + Ok(removed) + } +} + +impl EncodeInPlace for SvgCodec { + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError> { + let (asset_manifest_opt, _detected_tag_location, insertion_point) = + detect_manifest_location(&mut self.src)?; + let encoded_store_bytes = base64::encode(c2pa); + + if let Some(manifest_bytes) = asset_manifest_opt { + // base 64 encode + let encoded_manifest_bytes = base64::encode(&manifest_bytes); + // can patch if encoded lengths are == + if encoded_store_bytes.len() == encoded_manifest_bytes.len() { + self.src.seek(SeekFrom::Start(insertion_point as u64))?; + self.src.write_all(encoded_store_bytes.as_bytes())?; + Ok(()) + } else { + Err(CodecError::InvalidPatchSize { + expected: encoded_manifest_bytes.len() as u64, + actual: encoded_store_bytes.len() as u64, + }) + } + } else { + Err(CodecError::NothingToPatch) + } + } +} + +impl Span for SvgCodec { + fn span(&mut self) -> Result { + Ok(DefaultSpan::Data(self.c2pa_span()?)) + } + + fn c2pa_span(&mut self) -> Result { + let output: Vec = Vec::new(); + let mut dst = Cursor::new(output); + + add_required_segs_to_stream(&mut self.src, &mut dst)?; + + let mut positions = Vec::new(); + + let (decoded_manifest_opt, _detected_tag_location, manifest_pos) = + detect_manifest_location(&mut dst)?; + + // TODO: if decoded_manifest_opt is None, we need to generate a placeholder + // remove unwrap!!! + let decoded_manifest = decoded_manifest_opt.unwrap(); + let encoded_manifest_len = base64::encode(&decoded_manifest).len(); + + positions.push(ByteSpan { + start: manifest_pos as u64, + len: encoded_manifest_len as u64, + }); + + Ok(C2paSpan { spans: positions }) + } +} + +#[cfg(test)] +pub mod tests { + // #![allow(clippy::expect_used)] + // #![allow(clippy::panic)] + // #![allow(clippy::unwrap_used)] + + // use std::io::Read; + + // use tempfile::tempdir; + + // use super::*; + // use crate::utils::{ + // hash_utils::vec_compare, + // test::{fixture_path, temp_dir_path}, + // }; + + // #[test] + // fn test_write_svg_no_meta() { + // let more_data = "some more test data".as_bytes(); + // let source = fixture_path("sample1.svg"); + + // let mut success = false; + // if let Ok(temp_dir) = tempdir() { + // let output = temp_dir_path(&temp_dir, "sample1.svg"); + + // if let Ok(_size) = std::fs::copy(source, &output) { + // let svg_io = SvgCodec::new("svg"); + + // if let Ok(()) = svg_io.save_cai_store(&output, more_data) { + // if let Ok(read_test_data) = svg_io.read_cai_store(&output) { + // assert!(vec_compare(more_data, &read_test_data)); + // success = true; + // } + // } + // } + // } + // assert!(success) + // } + + // #[test] + // fn test_write_svg_with_meta() { + // let more_data = "some more test data".as_bytes(); + // let source = fixture_path("sample2.svg"); + + // let mut success = false; + // if let Ok(temp_dir) = tempdir() { + // let output = temp_dir_path(&temp_dir, "sample2.svg"); + + // if let Ok(_size) = std::fs::copy(source, &output) { + // let svg_io = SvgCodec::new("svg"); + + // if let Ok(()) = svg_io.save_cai_store(&output, more_data) { + // if let Ok(read_test_data) = svg_io.read_cai_store(&output) { + // assert!(vec_compare(more_data, &read_test_data)); + // success = true; + // } + // } + // } + // } + // assert!(success) + // } + + // #[test] + // fn test_write_svg_with_manifest() { + // let more_data = "some more test data into existing manifest".as_bytes(); + // let source = fixture_path("sample3.svg"); + + // let mut success = false; + // if let Ok(temp_dir) = tempdir() { + // let output = temp_dir_path(&temp_dir, "sample3.svg"); + + // if let Ok(_size) = std::fs::copy(source, &output) { + // let svg_io = SvgCodec::new("svg"); + + // if let Ok(()) = svg_io.save_cai_store(&output, more_data) { + // if let Ok(read_test_data) = svg_io.read_cai_store(&output) { + // assert!(vec_compare(more_data, &read_test_data)); + // success = true; + // } + // } + // } + // } + // assert!(success) + // } + + // #[test] + // fn test_patch_write_svg() { + // let test_data = "some test data".as_bytes(); + // let source = fixture_path("sample1.svg"); + + // let mut success = false; + // if let Ok(temp_dir) = tempdir() { + // let output = temp_dir_path(&temp_dir, "sample1.svg"); + + // if let Ok(_size) = std::fs::copy(source, &output) { + // let svg_io = SvgCodec::new("svg"); + + // if let Ok(()) = svg_io.save_cai_store(&output, test_data) { + // if let Ok(source_data) = svg_io.read_cai_store(&output) { + // // create replacement data of same size + // let mut new_data = vec![0u8; source_data.len()]; + // new_data[..test_data.len()].copy_from_slice(test_data); + // svg_io.patch_cai_store(&output, &new_data).unwrap(); + + // let replaced = svg_io.read_cai_store(&output).unwrap(); + + // assert_eq!(new_data, replaced); + + // success = true; + // } + // } + // } + // } + // assert!(success) + // } + + // #[test] + // fn test_remove_c2pa() { + // let source = fixture_path("sample4.svg"); + + // let temp_dir = tempdir().unwrap(); + // let output = temp_dir_path(&temp_dir, "sample4.svg"); + + // std::fs::copy(source, &output).unwrap(); + // let svg_io = SvgCodec::new("svg"); + + // svg_io.remove_cai_store(&output).unwrap(); + + // // read back in asset, JumbfNotFound is expected since it was removed + // match svg_io.read_cai_store(&output) { + // Err(Error::JumbfNotFound) => (), + // _ => unreachable!(), + // } + // } + + // #[test] + // fn test_get_object_location() { + // let more_data = "some more test data into existing manifest".as_bytes(); + // let source = fixture_path("sample1.svg"); + + // let mut success = false; + // if let Ok(temp_dir) = tempdir() { + // let output = temp_dir_path(&temp_dir, "sample1.svg"); + + // if let Ok(_size) = std::fs::copy(source, &output) { + // let svg_io = SvgCodec::new("svg"); + + // if let Ok(()) = svg_io.save_cai_store(&output, more_data) { + // if let Ok(locations) = svg_io.get_object_locations(&output) { + // for op in locations { + // if op.htype == HashBlockObjectType::Cai { + // let mut of = File::open(&output).unwrap(); + + // let mut manifests_buf: Vec = vec![0u8; op.length]; + // of.seek(SeekFrom::Start(op.offset as u64)).unwrap(); + // of.read_exact(manifests_buf.as_mut_slice()).unwrap(); + // let buf_str = std::str::from_utf8(&manifests_buf).unwrap(); + // let decoded_data = base64::decode(buf_str).unwrap(); + // if vec_compare(more_data, &decoded_data) { + // success = true; + // } + // } + // } + // } + // } + // } + // } + // assert!(success) + // } +} diff --git a/sdk/crates/c2pa-codecs/src/codecs/tiff_io.rs b/sdk/crates/c2pa-codecs/src/codecs/tiff_io.rs new file mode 100644 index 000000000..dbd081b14 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/codecs/tiff_io.rs @@ -0,0 +1,1846 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + collections::{BTreeMap, HashMap}, + fs::OpenOptions, + io::{Cursor, Read, Seek, SeekFrom, Write}, + path::Path, + vec, +}; + +use atree::{Arena, Token}; +use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; +use byteordered::{with_order, ByteOrdered, Endianness}; +use conv::ValueFrom; +use tempfile::Builder; + +use crate::{ + asset_io::{ + rename_or_move, AssetIO, AssetPatch, CAIRead, CAIReadWrite, CAIReader, CAIWriter, + ComposedManifestRef, HashBlockObjectType, HashObjectPositions, RemoteRefEmbed, + RemoteRefEmbedType, + }, + error::{Error, Result}, + utils::xmp_inmemory_utils::{add_provenance, MIN_XMP}, +}; + +const II: [u8; 2] = *b"II"; +const MM: [u8; 2] = *b"MM"; + +const C2PA_TAG: u16 = 0xcd41; +const XMP_TAG: u16 = 0x02bc; +const SUBFILE_TAG: u16 = 0x014a; +const EXIFIFD_TAG: u16 = 0x8769; +const GPSIFD_TAG: u16 = 0x8825; +const C2PA_FIELD_TYPE: u16 = 7; + +const STRIPBYTECOUNTS: u16 = 279; +const STRIPOFFSETS: u16 = 273; +const TILEBYTECOUNTS: u16 = 325; +const TILEOFFSETS: u16 = 324; + +const SUBFILES: [u16; 3] = [SUBFILE_TAG, EXIFIFD_TAG, GPSIFD_TAG]; + +static SUPPORTED_TYPES: [&str; 10] = [ + "tif", + "tiff", + "image/tiff", + "dng", + "image/dng", + "image/x-adobe-dng", + "arw", + "image/x-sony-arw", + "nef", + "image/x-nikon-nef", +]; + +// The type of an IFD entry +#[derive(Debug, PartialEq)] +enum IFDEntryType { + Byte = 1, // 8-bit unsigned integer + Ascii = 2, // 8-bit byte that contains a 7-bit ASCII code; the last byte must be zero + Short = 3, // 16-bit unsigned integer + Long = 4, // 32-bit unsigned integer + Rational = 5, // Fraction stored as two 32-bit unsigned integers + Sbyte = 6, // 8-bit signed integer + Undefined = 7, // 8-bit byte that may contain anything, depending on the field + Sshort = 8, // 16-bit signed integer + Slong = 9, // 32-bit signed integer + Srational = 10, // Fraction stored as two 32-bit signed integers + Float = 11, // 32-bit IEEE floating point + Double = 12, // 64-bit IEEE floating point + Ifd = 13, // 32-bit unsigned integer (offset) + Long8 = 16, // BigTIFF 64-bit unsigned integer + Slong8 = 17, // BigTIFF 64-bit unsigned integer (offset) + Ifd8 = 18, // 64-bit unsigned integer (offset) +} + +impl IFDEntryType { + pub fn from_u16(val: u16) -> Option { + match val { + 1 => Some(IFDEntryType::Byte), + 2 => Some(IFDEntryType::Ascii), + 3 => Some(IFDEntryType::Short), + 4 => Some(IFDEntryType::Long), + 5 => Some(IFDEntryType::Rational), + 6 => Some(IFDEntryType::Sbyte), + 7 => Some(IFDEntryType::Undefined), + 8 => Some(IFDEntryType::Sshort), + 9 => Some(IFDEntryType::Slong), + 10 => Some(IFDEntryType::Srational), + 11 => Some(IFDEntryType::Float), + 12 => Some(IFDEntryType::Double), + 13 => Some(IFDEntryType::Ifd), + 16 => Some(IFDEntryType::Long8), + 17 => Some(IFDEntryType::Slong8), + 18 => Some(IFDEntryType::Ifd8), + _ => None, + } + } +} + +// TIFF IFD Entry (value_offset is in target endian) +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct IfdEntry { + entry_tag: u16, + entry_type: u16, + value_count: u64, + value_offset: u64, +} + +// helper enum to know if the IFD requires special handling +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum IfdType { + Page, + Subfile, + Exif, + Gps, +} + +// TIFF IFD +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ImageFileDirectory { + offset: u64, + entry_cnt: u64, + ifd_type: IfdType, + entries: HashMap, + next_ifd_offset: Option, +} + +impl ImageFileDirectory { + #[allow(dead_code)] + pub fn get_tag(&self, tag_id: u16) -> Option<&IfdEntry> { + self.entries.get(&tag_id) + } + + #[allow(dead_code)] + pub fn get_tag_mut(&mut self, tag_id: u16) -> Option<&mut IfdEntry> { + self.entries.get_mut(&tag_id) + } +} + +// Struct to map the contents of a TIFF file +#[allow(dead_code)] +pub(crate) struct TiffStructure { + byte_order: Endianness, + big_tiff: bool, + first_ifd_offset: u64, + first_ifd: Option, +} + +impl TiffStructure { + #[allow(dead_code)] + pub fn load(reader: &mut R) -> Result + where + R: Read + Seek + ?Sized, + { + let mut endianness = [0u8, 2]; + reader.read_exact(&mut endianness)?; + + let byte_order = match endianness { + II => Endianness::Little, + MM => Endianness::Big, + _ => { + return Err(Error::InvalidAsset( + "Could not parse input image".to_owned(), + )) + } + }; + + let mut byte_reader = ByteOrdered::runtime(reader, byte_order); + + let big_tiff = match byte_reader.read_u16() { + Ok(42) => false, + Ok(43) => { + // read Big TIFF structs + // Read byte size of offsets, must be 8 + if byte_reader.read_u16()? != 8 { + return Err(Error::InvalidAsset( + "Could not parse input image".to_owned(), + )); + } + // must currently be 0 + if byte_reader.read_u16()? != 0 { + return Err(Error::InvalidAsset( + "Could not parse input image".to_owned(), + )); + } + true + } + _ => { + return Err(Error::InvalidAsset( + "Could not parse input image".to_owned(), + )) + } + }; + + let first_ifd_offset = if big_tiff { + byte_reader.read_u64()? + } else { + byte_reader.read_u32()?.into() + }; + + // move read pointer to IFD + byte_reader.seek(SeekFrom::Start(first_ifd_offset))?; + let first_ifd = TiffStructure::read_ifd( + byte_reader.into_inner(), + byte_order, + big_tiff, + IfdType::Page, + )?; + + let ts = TiffStructure { + byte_order, + big_tiff, + first_ifd_offset, + first_ifd: Some(first_ifd), + }; + + Ok(ts) + } + + // read IFD entries, all value_offset are in source endianness + pub fn read_ifd_entries( + byte_reader: &mut ByteOrdered<&mut R, Endianness>, + big_tiff: bool, + entry_cnt: u64, + entries: &mut HashMap, + ) -> Result<()> + where + R: Read + Seek + ?Sized, + { + for _ in 0..entry_cnt { + let tag = byte_reader.read_u16()?; + let tag_type = byte_reader.read_u16()?; + + let (count, data_offset) = if big_tiff { + let count = byte_reader.read_u64()?; + let mut buf = [0; 8]; + byte_reader.read_exact(&mut buf)?; + + let data_offset = buf + .as_slice() + .read_u64::() + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + (count, data_offset) + } else { + let count = byte_reader.read_u32()?; + let mut buf = [0; 4]; + byte_reader.read_exact(&mut buf)?; + + let data_offset = buf + .as_slice() + .read_u32::() + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + (count.into(), data_offset.into()) + }; + + let ifd_entry = IfdEntry { + entry_tag: tag, + entry_type: tag_type, + value_count: count, + value_offset: data_offset, + }; + + /* + println!( + "{}, {}, {}. {:?}", + ifd_entry.entry_tag, + ifd_entry.entry_type, + ifd_entry.value_count, + ifd_entry.value_offset.to_ne_bytes() + ); + */ + + entries.insert(tag, ifd_entry); + } + + Ok(()) + } + + // read IFD from reader + pub fn read_ifd( + reader: &mut R, + byte_order: Endianness, + big_tiff: bool, + ifd_type: IfdType, + ) -> Result + where + R: Read + Seek + ReadBytesExt + ?Sized, + { + let mut byte_reader = ByteOrdered::runtime(reader, byte_order); + + let ifd_offset = byte_reader.stream_position()?; + //println!("IFD Offset: {:#x}", ifd_offset); + + let entry_cnt = if big_tiff { + byte_reader.read_u64()? + } else { + byte_reader.read_u16()?.into() + }; + + let mut ifd = ImageFileDirectory { + offset: ifd_offset, + entry_cnt, + ifd_type, + entries: HashMap::new(), + next_ifd_offset: None, + }; + + TiffStructure::read_ifd_entries(&mut byte_reader, big_tiff, entry_cnt, &mut ifd.entries)?; + + let next_ifd = if big_tiff { + byte_reader.read_u64()? + } else { + byte_reader.read_u32()?.into() + }; + + match next_ifd { + 0 => (), + _ => ifd.next_ifd_offset = Some(next_ifd), + }; + + Ok(ifd) + } +} + +// offset are stored in source endianness so to use offset value in Seek calls we must convert to native endianness +fn decode_offset(offset_file_native: u64, endianness: Endianness, big_tiff: bool) -> Result { + let offset: u64; + let offset_bytes = offset_file_native.to_ne_bytes(); + let offset_reader = Cursor::new(offset_bytes); + + with_order!(offset_reader, endianness, |src| { + if big_tiff { + let o = src.read_u64()?; + offset = o; + } else { + let o = src.read_u32()?; + offset = o.into(); + } + }); + + Ok(offset) +} + +fn stream_len(reader: &mut dyn CAIRead) -> crate::Result { + let old_pos = reader.stream_position()?; + let len = reader.seek(SeekFrom::End(0))?; + + if old_pos != len { + reader.seek(SeekFrom::Start(old_pos))?; + } + + Ok(len) +} +// create tree of TIFF structure IFDs and IFD entries. +fn map_tiff(input: &mut R) -> Result<(Arena, Token, Endianness, bool)> +where + R: Read + Seek + ?Sized, +{ + let _size = input.seek(SeekFrom::End(0))?; + input.rewind()?; + + let ts = TiffStructure::load(input)?; + + let (tiff_tree, page_0): (Arena, Token) = if let Some(ifd) = + ts.first_ifd.clone() + { + let (mut tiff_tree, page_0_token) = Arena::with_data(ifd); + /* No multi-page at the moment + // get the pages + loop { + if let Some(next_ifd_offset) = &tiff_tree[current_token].data.next_ifd_offset { + input.seek(SeekFrom::Start(*next_ifd_offset))?; + + let next_ifd = TiffStructure::read_ifd(input, ts.byte_order, ts.big_tiff, IFDType::PageIFD)?; + + current_token = current_token.append(&mut tiff_tree, next_ifd) + + } else { + break; + } + } + */ + + // look for known special IFDs on page 0 + let page0_subifd = tiff_tree[page_0_token].data.get_tag(SUBFILE_TAG).copied(); + + // grab SubIFDs for page 0 (DNG) + if let Some(subifd) = page0_subifd { + let decoded_offset = decode_offset(subifd.value_offset, ts.byte_order, ts.big_tiff)?; + input.seek(SeekFrom::Start(decoded_offset))?; + + let num_longs = usize::value_from(subifd.value_count) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut subfile_offsets = vec![0u32; num_longs]; // will contain offsets in native endianness + + if num_longs * 4 <= 4 || ts.big_tiff && num_longs * 4 <= 8 { + let offset_bytes = subifd.value_offset.to_ne_bytes(); + let offset_reader = Cursor::new(offset_bytes); + + with_order!(offset_reader, ts.byte_order, |src| { + for item in subfile_offsets.iter_mut().take(num_longs) { + let s = src.read_u32()?; // read a long from offset + *item = s; // write a long in output endian + } + }); + } else { + let mut buf = vec![0u8; num_longs * 4]; + input.read_exact(buf.as_mut_slice())?; + let offsets_buf = Cursor::new(buf); + + with_order!(offsets_buf, ts.byte_order, |src| { + for item in subfile_offsets.iter_mut().take(num_longs) { + let s = src.read_u32()?; // read a long from offset + *item = s; // write a long in output endian + } + }); + } + + // get all subfiles + for subfile_offset in subfile_offsets { + let u64_offset = u64::value_from(subfile_offset) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + input.seek(SeekFrom::Start(u64_offset))?; + + //println!("Reading SubIFD: {}", u64_offset); + + let subfile_ifd = + TiffStructure::read_ifd(input, ts.byte_order, ts.big_tiff, IfdType::Subfile)?; + let subfile_token = tiff_tree.new_node(subfile_ifd); + + page_0_token + .append_node(&mut tiff_tree, subfile_token) + .map_err(|_err| Error::InvalidAsset("Bad TIFF Structure".to_string()))?; + } + } + + // grab EXIF IFD for page 0 (DNG) + if let Some(exififd) = tiff_tree[page_0_token].data.get_tag(EXIFIFD_TAG) { + let decoded_offset = decode_offset(exififd.value_offset, ts.byte_order, ts.big_tiff)?; + input.seek(SeekFrom::Start(decoded_offset))?; + + //println!("EXIF Reading SubIFD: {}", decoded_offset); + + let exif_ifd = + TiffStructure::read_ifd(input, ts.byte_order, ts.big_tiff, IfdType::Exif)?; + let exif_token = tiff_tree.new_node(exif_ifd); + + page_0_token + .append_node(&mut tiff_tree, exif_token) + .map_err(|_err| Error::InvalidAsset("Bad TIFF Structure".to_string()))?; + } + + // grab GPS IFD for page 0 (DNG) + if let Some(gpsifd) = tiff_tree[page_0_token].data.get_tag(GPSIFD_TAG) { + let decoded_offset = decode_offset(gpsifd.value_offset, ts.byte_order, ts.big_tiff)?; + input.seek(SeekFrom::Start(decoded_offset))?; + + //println!("GPS Reading SubIFD: {}", decoded_offset); + + let gps_ifd = TiffStructure::read_ifd(input, ts.byte_order, ts.big_tiff, IfdType::Gps)?; + let gps_token = tiff_tree.new_node(gps_ifd); + + page_0_token + .append_node(&mut tiff_tree, gps_token) + .map_err(|_err| Error::InvalidAsset("Bad TIFF Structure".to_string()))?; + } + + (tiff_tree, page_0_token) + } else { + return Err(Error::InvalidAsset("TIFF structure invalid".to_string())); + }; + + Ok((tiff_tree, page_0, ts.byte_order, ts.big_tiff)) +} + +// struct used to clone source IFD entries. value_bytes are in target endianness +#[derive(Eq, PartialEq, Clone)] +pub(crate) struct IfdClonedEntry { + pub entry_tag: u16, + pub entry_type: u16, + pub value_count: u64, + pub value_bytes: Vec, +} + +// struct to clone a TIFF/DNG and new tags if desired +pub(crate) struct TiffCloner +where + T: Read + Write + Seek, +{ + endianness: Endianness, + big_tiff: bool, + first_idf_offset: u64, + writer: ByteOrdered, + additional_ifds: BTreeMap, +} + +impl TiffCloner { + pub fn new(endianness: Endianness, big_tiff: bool, writer: T) -> Result> { + let bo = ByteOrdered::runtime(writer, endianness); + + let mut tc = TiffCloner { + endianness, + big_tiff, + first_idf_offset: 0, + writer: bo, + additional_ifds: BTreeMap::new(), + }; + + tc.write_header()?; + + Ok(tc) + } + + fn offset(&mut self) -> Result { + Ok(self.writer.stream_position()?) + } + + fn pad_word_boundary(&mut self) -> Result<()> { + let curr_offset = self.offset()?; + if curr_offset % 4 != 0 { + let padding = [0, 0, 0]; + let pad_len = 4 - (curr_offset % 4); + self.writer.write_all(&padding[..pad_len as usize])?; + } + + Ok(()) + } + + fn write_header(&mut self) -> Result { + let boi = match self.endianness { + Endianness::Big => 0x4d, + Endianness::Little => 0x49, + }; + let offset; + + if self.big_tiff { + self.writer.write_all(&[boi, boi])?; + self.writer.write_u16(43u16)?; + self.writer.write_u16(8u16)?; + self.writer.write_u16(0u16)?; + offset = self.writer.stream_position()?; // first ifd offset + + self.writer.write_u64(0)?; + } else { + self.writer.write_all(&[boi, boi])?; + self.writer.write_u16(42u16)?; + offset = self.writer.stream_position()?; // first ifd offset + + self.writer.write_u32(0)?; + } + + self.first_idf_offset = offset; + Ok(offset) + } + + fn write_entry_count(&mut self, count: usize) -> Result<()> { + if self.big_tiff { + let cnt = u64::value_from(count) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + self.writer.write_u64(cnt)?; + } else { + let cnt = u16::value_from(count) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + self.writer.write_u16(cnt)?; + } + + Ok(()) + } + + fn write_ifd(&mut self, target_ifd: &mut BTreeMap) -> Result { + // write out all data and save the offsets, skipping subfiles since the data is already written + for &mut IfdClonedEntry { + value_bytes: ref mut value_bytes_ref, + .. + } in target_ifd.values_mut() + { + let data_bytes = if self.big_tiff { 8 } else { 4 }; + + if value_bytes_ref.len() > data_bytes { + // get location of entry data start + let offset = self.writer.stream_position()?; + + // write out the data bytes + self.writer.write_all(value_bytes_ref)?; + + // set offset pointer in file source endian + let mut offset_vec = vec![0; data_bytes]; + + with_order!(offset_vec.as_mut_slice(), self.endianness, |ew| { + if self.big_tiff { + ew.write_u64(offset)?; + } else { + let offset_u32 = u32::value_from(offset).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; // get beginning of chunk which starts 4 bytes before label + + ew.write_u32(offset_u32)?; + } + }); + + // set to new data offset position + *value_bytes_ref = offset_vec; + } else { + while value_bytes_ref.len() < data_bytes { + value_bytes_ref.push(0); + } + } + } + + // Write out the IFD + + // start on a WORD boundary + self.pad_word_boundary()?; + + // save location of start of IFD + let ifd_offset = self.writer.stream_position()?; + + // write out the entry count + self.write_entry_count(target_ifd.len())?; + + // write out the directory entries + for (tag, entry) in target_ifd.iter() { + self.writer.write_u16(*tag)?; + self.writer.write_u16(entry.entry_type)?; + + if self.big_tiff { + let cnt = u64::value_from(entry.value_count) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + self.writer.write_u64(cnt)?; + } else { + let cnt = u32::value_from(entry.value_count) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + self.writer.write_u32(cnt)?; + } + + self.writer.write_all(&entry.value_bytes)?; + } + + Ok(ifd_offset) + } + + // add new TAG by supplying the IDF entry + pub fn add_target_tag(&mut self, entry: IfdClonedEntry) { + self.additional_ifds.insert(entry.entry_tag, entry); + } + + fn clone_image_data( + &mut self, + target_ifd: &mut BTreeMap, + asset_reader: &mut R, + ) -> Result<()> { + match ( + target_ifd.contains_key(&STRIPBYTECOUNTS), + target_ifd.contains_key(&STRIPOFFSETS), + target_ifd.contains_key(&TILEBYTECOUNTS), + target_ifd.contains_key(&TILEOFFSETS), + ) { + (true, true, false, false) => { + // stripped image data + let sbc_entry = target_ifd[&STRIPBYTECOUNTS].clone(); + let so_entry = target_ifd.get_mut(&STRIPOFFSETS).ok_or(Error::NotFound)?; + + // check for well formed TIFF + if so_entry.value_count != sbc_entry.value_count { + return Err(Error::InvalidAsset( + "TIFF strip count does not match strip offset count".to_string(), + )); + } + + let mut sbcs = vec![0u64; sbc_entry.value_count as usize]; + let mut dest_offsets: Vec = Vec::new(); + + // get the byte counts + with_order!(sbc_entry.value_bytes.as_slice(), self.endianness, |src| { + for c in &mut sbcs { + match sbc_entry.entry_type { + 4u16 => { + let s = src.read_u32()?; + *c = s.into(); + } + 3u16 => { + let s = src.read_u16()?; + *c = s.into(); + } + 16u16 => { + let s = src.read_u64()?; + *c = s; + } + _ => return Err(Error::InvalidAsset("invalid TIFF strip".to_string())), + } + } + }); + + // seek to end of file + self.writer.seek(SeekFrom::End(0))?; + + // copy the strips + with_order!(so_entry.value_bytes.as_slice(), self.endianness, |src| { + for c in sbcs.iter() { + let cnt = usize::value_from(*c).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + + // get the offset + let so: u64 = match so_entry.entry_type { + 4u16 => { + let s = src.read_u32()?; + s.into() + } + 3u16 => { + let s = src.read_u16()?; + s.into() + } + 16u16 => src.read_u64()?, + _ => return Err(Error::InvalidAsset("invalid TIFF strip".to_string())), + }; + + let dest_offset = self.writer.stream_position()?; + dest_offsets.push(dest_offset); + + // copy the strip to new file + let mut data = vec![0u8; cnt]; + asset_reader.seek(SeekFrom::Start(so))?; + asset_reader.read_exact(data.as_mut_slice())?; + self.writer.write_all(data.as_slice())?; + } + }); + + // patch the offsets + with_order!( + so_entry.value_bytes.as_mut_slice(), + self.endianness, + |dest| { + for o in dest_offsets.iter() { + // get the offset + match so_entry.entry_type { + 4u16 => { + let offset = u32::value_from(*o).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + dest.write_u32(offset)?; + } + 3u16 => { + let offset = u16::value_from(*o).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + dest.write_u16(offset)?; + } + 16u16 => { + let offset = *o; + dest.write_u64(offset)?; + } + _ => { + return Err(Error::InvalidAsset( + "invalid TIFF strip".to_string(), + )) + } + } + } + } + ); + } + (false, false, true, true) => { + // tiled image data + let tbc_entry = target_ifd[&TILEBYTECOUNTS].clone(); + let to_entry = target_ifd.get_mut(&TILEOFFSETS).ok_or(Error::NotFound)?; + + // check for well formed TIFF + if to_entry.value_count != tbc_entry.value_count { + return Err(Error::InvalidAsset( + "TIFF tile count does not match tile offset count".to_string(), + )); + } + + let mut tbcs = vec![0u64; tbc_entry.value_count as usize]; + let mut dest_offsets: Vec = Vec::new(); + + // get the byte counts + with_order!(tbc_entry.value_bytes.as_slice(), self.endianness, |src| { + for val in &mut tbcs { + match tbc_entry.entry_type { + 4u16 => { + let s = src.read_u32()?; + *val = s.into(); + } + 3u16 => { + let s = src.read_u16()?; + *val = s.into(); + } + 16u16 => { + let s = src.read_u64()?; + *val = s; + } + _ => return Err(Error::InvalidAsset("invalid TIFF tile".to_string())), + } + } + }); + + // seek to end of file + self.writer.seek(SeekFrom::End(0))?; + + // copy the tiles + with_order!(to_entry.value_bytes.as_slice(), self.endianness, |src| { + for c in tbcs.iter() { + let cnt = usize::value_from(*c).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + + // get the offset + let to: u64 = match to_entry.entry_type { + 4u16 => { + let s = src.read_u32()?; + s.into() + } + 16u16 => src.read_u64()?, + _ => return Err(Error::InvalidAsset("invalid TIFF tile".to_string())), + }; + + let dest_offset = self.writer.stream_position()?; + dest_offsets.push(dest_offset); + + // copy the tile to new file + let mut data = vec![0u8; cnt]; + asset_reader.seek(SeekFrom::Start(to))?; + asset_reader.read_exact(data.as_mut_slice())?; + self.writer.write_all(data.as_slice())?; + } + }); + + // patch the offsets + with_order!( + to_entry.value_bytes.as_mut_slice(), + self.endianness, + |dest| { + for v in dest_offsets.iter() { + // get the offset + match to_entry.entry_type { + 4u16 => { + let offset = u32::value_from(*v).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + dest.write_u32(offset)?; + } + 3u16 => { + let offset = u16::value_from(*v).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + dest.write_u16(offset)?; + } + 16u16 => { + let offset = *v; + dest.write_u64(offset)?; + } + _ => { + return Err(Error::InvalidAsset( + "invalid TIFF tile".to_string(), + )) + } + } + } + } + ); + } + (_, _, _, _) => (), + }; + + Ok(()) + } + + fn clone_sub_files( + &mut self, + tiff_tree: &Arena, + page: Token, + asset_reader: &mut R, + ) -> Result>> { + // offset map + let mut offset_map: HashMap> = HashMap::new(); + + let mut offsets_ifd: Vec = Vec::new(); + let mut offsets_exif: Vec = Vec::new(); + let mut offsets_gps: Vec = Vec::new(); + + // clone the EXIF entry and DNG entries + for n in page.children(tiff_tree) { + let ifd = &n.data; + + // clone IFD entries + let mut cloned_ifd = self.clone_ifd_entries(&ifd.entries, asset_reader)?; + + // clone the image data + self.clone_image_data(&mut cloned_ifd, asset_reader)?; + + // write directory + let sub_ifd_offset = self.write_ifd(&mut cloned_ifd)?; + + // terminate since we don't support chained subifd + if self.big_tiff { + self.writer.write_u64(0)?; + } else { + self.writer.write_u32(0)?; + } + + // fix up offset in main page known IFDs + match ifd.ifd_type { + IfdType::Page => (), + IfdType::Subfile => offsets_ifd.push(sub_ifd_offset), + IfdType::Exif => offsets_exif.push(sub_ifd_offset), + IfdType::Gps => offsets_gps.push(sub_ifd_offset), + }; + } + + offset_map.insert(SUBFILE_TAG, offsets_ifd); + offset_map.insert(EXIFIFD_TAG, offsets_exif); + offset_map.insert(GPSIFD_TAG, offsets_gps); + + Ok(offset_map) + } + + pub fn clone_tiff( + &mut self, + tiff_tree: &mut Arena, + page_0: Token, + asset_reader: &mut R, + ) -> Result<()> { + // handle page 0 + + // clone the subfile entries (DNG) + let subfile_offsets = self.clone_sub_files(tiff_tree, page_0, asset_reader)?; + + let page_0_idf = tiff_tree + .get_mut(page_0) + .ok_or_else(|| Error::InvalidAsset("TIFF does not have IFD".to_string()))?; + + // clone IFD entries + let mut cloned_ifd = self.clone_ifd_entries(&page_0_idf.data.entries, asset_reader)?; + + // clone the image data + self.clone_image_data(&mut cloned_ifd, asset_reader)?; + + // add in new Tags + for (tag, new_entry) in &self.additional_ifds { + cloned_ifd.insert(*tag, new_entry.clone()); + } + + // fix up subfile offsets + for t in SUBFILES { + if let Some(offsets) = subfile_offsets.get(&t) { + if offsets.is_empty() { + continue; + } + + let e = cloned_ifd + .get_mut(&t) + .ok_or_else(|| Error::InvalidAsset("TIFF does not have IFD".to_string()))?; + let mut adjust_offsets = if self.big_tiff { + vec![0u8; offsets.len() * 8] + } else { + vec![0u8; offsets.len() * 4] + }; + + with_order!(adjust_offsets.as_mut_slice(), self.endianness, |dest| { + for o in offsets { + if self.big_tiff { + dest.write_u64(*o)?; + } else { + let offset_u32 = u32::value_from(*o).map_err(|_err| { + Error::InvalidAsset("value out of range".to_string()) + })?; + + dest.write_u32(offset_u32)?; + } + } + }); + + e.value_bytes = adjust_offsets; + } + } + + // write directory + let first_ifd_offset = self.write_ifd(&mut cloned_ifd)?; + + // write final location info + let curr_pos = self.offset()?; + + self.writer.seek(SeekFrom::Start(self.first_idf_offset))?; + + if self.big_tiff { + self.writer.write_u64(first_ifd_offset)?; + self.writer.seek(SeekFrom::Start(curr_pos))?; + self.writer.write_u64(0)?; + } else { + let offset_u32 = u32::value_from(first_ifd_offset) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; // get beginning of chunk which starts 4 bytes before label + + self.writer.write_u32(offset_u32)?; + self.writer.seek(SeekFrom::Start(curr_pos))?; + self.writer.write_u32(0)?; + } + self.writer.flush()?; + Ok(()) + } + + fn clone_ifd_entries( + &mut self, + entries: &HashMap, + asset_reader: &mut R, + ) -> Result> { + let mut target_ifd: BTreeMap = BTreeMap::new(); + + for (tag, entry) in entries { + let target_endianness = self.writer.endianness(); + + // get bytes for tag + let cnt = entry.value_count; + let et = entry.entry_type; + + let entry_type = IFDEntryType::from_u16(et).ok_or(Error::UnsupportedType)?; + + // read IFD raw data in file native endian format + let data = match entry_type { + IFDEntryType::Byte + | IFDEntryType::Sbyte + | IFDEntryType::Undefined + | IFDEntryType::Ascii => { + let num_bytes = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + let mut data = vec![0u8; num_bytes]; + + if num_bytes <= 4 || self.big_tiff && num_bytes <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + for (i, item) in offset_bytes.iter().take(num_bytes).enumerate() { + data[i] = *item; + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Short => { + let num_shorts = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_shorts * 2]; + + if num_shorts * 2 <= 4 || self.big_tiff && num_shorts * 2 <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + let mut offset_reader = Cursor::new(offset_bytes); + + let mut w = Cursor::new(data.as_mut_slice()); + for _i in 0..num_shorts { + let s = offset_reader.read_u16::()?; // read a short from offset + w.write_u16::(s)?; // write a short in output endian + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Long | IFDEntryType::Ifd => { + let num_longs = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_longs * 4]; + + if num_longs * 4 <= 4 || self.big_tiff && num_longs * 4 <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + let mut offset_reader = Cursor::new(offset_bytes); + + let mut w = Cursor::new(data.as_mut_slice()); + for _i in 0..num_longs { + let s = offset_reader.read_u32::()?; // read a long from offset + w.write_u32::(s)?; // write a long in output endian + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Rational => { + let num_rationals = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_rationals * 8]; + + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + + asset_reader.read_exact(data.as_mut_slice())?; + + data + } + IFDEntryType::Sshort => { + let num_sshorts = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_sshorts * 2]; + + if num_sshorts * 2 <= 4 || self.big_tiff && num_sshorts * 2 <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + let mut offset_reader = Cursor::new(offset_bytes); + + let mut w = Cursor::new(data.as_mut_slice()); + for _i in 0..num_sshorts { + let s = offset_reader.read_i16::()?; // read a short from offset + w.write_i16::(s)?; // write a short in output endian + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Slong => { + let num_slongs = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_slongs * 4]; + + if num_slongs * 4 <= 4 || self.big_tiff && num_slongs * 4 <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + let mut offset_reader = Cursor::new(offset_bytes); + + let mut w = Cursor::new(data.as_mut_slice()); + for _i in 0..num_slongs { + let s = offset_reader.read_i32::()?; // read a slong from offset + w.write_i32::(s)?; // write a slong in output endian + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Srational => { + let num_srationals = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_srationals * 8]; + + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + + data + } + IFDEntryType::Float => { + let num_floats = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_floats * 4]; + + if num_floats * 4 <= 4 || self.big_tiff && num_floats * 4 <= 8 { + let offset_bytes = entry.value_offset.to_ne_bytes(); + let mut offset_reader = Cursor::new(offset_bytes); + + let mut w = Cursor::new(data.as_mut_slice()); + for _i in 0..num_floats { + let s = offset_reader.read_f32::()?; // read a float from offset + w.write_f32::(s)?; // write a float in output endian + } + } else { + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + } + + data + } + IFDEntryType::Double => { + let num_doubles = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_doubles * 8]; + + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + + data + } + IFDEntryType::Long8 | IFDEntryType::Ifd8 => { + let num_long8s = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_long8s * 8]; + + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + + data + } + IFDEntryType::Slong8 => { + let num_slong8s = usize::value_from(cnt) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + let mut data = vec![0u8; num_slong8s * 8]; + + // move to start of data + asset_reader.seek(SeekFrom::Start(decode_offset( + entry.value_offset, + target_endianness, + self.big_tiff, + )?))?; + asset_reader.read_exact(data.as_mut_slice())?; + + data + } + }; + + target_ifd.insert( + *tag, + IfdClonedEntry { + entry_tag: *tag, + entry_type: entry_type as u16, + value_count: cnt, + value_bytes: data, + }, + ); + } + + Ok(target_ifd) + } +} + +fn tiff_clone_with_tags( + writer: &mut W, + asset_reader: &mut R, + tiff_tags: Vec, +) -> Result<()> { + let (mut tiff_tree, page_0, endianness, big_tiff) = map_tiff(asset_reader)?; + + let mut bo = ByteOrdered::new(writer, endianness); + + let mut tc = TiffCloner::new(endianness, big_tiff, &mut bo)?; + + for t in tiff_tags { + tc.add_target_tag(t); + } + + tc.clone_tiff(&mut tiff_tree, page_0, asset_reader)?; + + Ok(()) +} +fn add_required_tags_to_stream( + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, +) -> Result<()> { + let tiff_io = TiffIO {}; + + match tiff_io.read_cai(input_stream) { + Ok(_) => { + // just clone + input_stream.rewind()?; + output_stream.rewind()?; + std::io::copy(input_stream, output_stream)?; + Ok(()) + } + Err(Error::JumbfNotFound) => { + // allocate enough bytes so that value is not stored in offset field + let some_bytes = vec![0u8; 10]; + let tio = TiffIO {}; + tio.write_cai(input_stream, output_stream, &some_bytes) + } + Err(e) => Err(e), + } +} + +fn get_cai_data(asset_reader: &mut R) -> Result> +where + R: Read + Seek + ?Sized, +{ + let (tiff_tree, page_0, e, big_tiff) = map_tiff(asset_reader)?; + + let first_ifd = &tiff_tree[page_0].data; + + let cai_ifd_entry = first_ifd.get_tag(C2PA_TAG).ok_or(Error::JumbfNotFound)?; + + // make sure data type is for unstructured data + if cai_ifd_entry.entry_type != C2PA_FIELD_TYPE { + return Err(Error::InvalidAsset( + "Ifd entry for C2PA must be type UNDEFINED(7)".to_string(), + )); + } + + // move read point to start of entry + let decoded_offset = decode_offset(cai_ifd_entry.value_offset, e, big_tiff)?; + asset_reader.seek(SeekFrom::Start(decoded_offset))?; + + let manifest_len: usize = usize::value_from(cai_ifd_entry.value_count) + .map_err(|_err| Error::InvalidAsset("TIFF/DNG out of range".to_string()))?; + + let mut data = vec![0u8; manifest_len]; + + asset_reader + .read_exact(data.as_mut_slice()) + .map_err(|_err| Error::InvalidAsset("TIFF/DNG out of range".to_string()))?; + + Ok(data) +} + +fn get_xmp_data(asset_reader: &mut R) -> Option> +where + R: Read + Seek + ?Sized, +{ + let (tiff_tree, page_0, e, big_tiff) = map_tiff(asset_reader).ok()?; + let first_ifd = &tiff_tree[page_0].data; + + let xmp_ifd_entry = match first_ifd.get_tag(XMP_TAG) { + Some(entry) => entry, + None => return None, + }; + + // make sure the tag type is correct + if IFDEntryType::from_u16(xmp_ifd_entry.entry_type)? != IFDEntryType::Byte { + return None; + } + + // move read point to start of entry + let decoded_offset = decode_offset(xmp_ifd_entry.value_offset, e, big_tiff).ok()?; + asset_reader.seek(SeekFrom::Start(decoded_offset)).ok()?; + + let xmp_len: usize = usize::value_from(xmp_ifd_entry.value_count).ok()?; + + let mut data = vec![0u8; xmp_len]; + + asset_reader.read_exact(data.as_mut_slice()).ok()?; + + Some(data) +} +pub struct TiffIO {} + +impl CAIReader for TiffIO { + fn read_cai(&self, asset_reader: &mut dyn CAIRead) -> Result> { + let cai_data = get_cai_data(asset_reader)?; + Ok(cai_data) + } + + fn read_xmp(&self, asset_reader: &mut dyn CAIRead) -> Option { + let xmp_data = get_xmp_data(asset_reader)?; + String::from_utf8(xmp_data).ok() + } +} + +impl AssetIO for TiffIO { + fn asset_patch_ref(&self) -> Option<&dyn AssetPatch> { + Some(self) + } + + fn read_cai_store(&self, asset_path: &std::path::Path) -> Result> { + let mut reader = std::fs::File::open(asset_path)?; + + self.read_cai(&mut reader) + } + + fn save_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut input_stream = std::fs::OpenOptions::new() + .read(true) + .open(asset_path) + .map_err(Error::IoError)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations( + &self, + asset_path: &std::path::Path, + ) -> Result> { + let mut input_stream = + std::fs::File::open(asset_path).map_err(|_err| Error::EmbeddingError)?; + + self.get_object_locations_from_stream(&mut input_stream) + } + + fn remove_cai_store(&self, asset_path: &std::path::Path) -> Result<()> { + let mut input_file = std::fs::File::open(asset_path)?; + + let mut temp_file = Builder::new() + .prefix("c2pa_temp") + .rand_bytes(5) + .tempfile()?; + + self.remove_cai_store_from_stream(&mut input_file, &mut temp_file)?; + + // copy temp file to asset + rename_or_move(temp_file, asset_path) + } + + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + TiffIO {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(TiffIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(TiffIO::new(asset_type))) + } + + fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { + Some(self) + } + + fn composed_data_ref(&self) -> Option<&dyn ComposedManifestRef> { + Some(self) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +impl CAIWriter for TiffIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + let l = u64::value_from(store_bytes.len()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + let entry = IfdClonedEntry { + entry_tag: C2PA_TAG, + entry_type: C2PA_FIELD_TYPE, + value_count: l, + value_bytes: store_bytes.to_vec(), + }; + + tiff_clone_with_tags(output_stream, input_stream, vec![entry]) + } + + fn get_object_locations_from_stream( + &self, + input_stream: &mut dyn CAIRead, + ) -> Result> { + let len = stream_len(input_stream)?; + let vec_cap = usize::value_from(len) + .map_err(|_err| Error::InvalidAsset("value out of range".to_owned()))?; + let output_buf: Vec = Vec::with_capacity(vec_cap + 100); + + let mut output_stream = Cursor::new(output_buf); + + add_required_tags_to_stream(input_stream, &mut output_stream)?; + output_stream.rewind()?; + + let (idfs, first_idf_token, e, big_tiff) = map_tiff(&mut output_stream)?; + + let cai_ifd_entry = match idfs[first_idf_token].data.get_tag(C2PA_TAG) { + Some(ifd) => ifd, + None => return Ok(Vec::new()), + }; + + // make sure data type is for unstructured data + if cai_ifd_entry.entry_type != C2PA_FIELD_TYPE { + return Err(Error::InvalidAsset( + "Ifd entry for C2PA must be type UNKNOWN(7)".to_string(), + )); + } + + let decoded_offset = decode_offset(cai_ifd_entry.value_offset, e, big_tiff)?; + let manifest_offset = usize::value_from(decoded_offset) + .map_err(|_err| Error::InvalidAsset("TIFF/DNG out of range".to_string()))?; + let manifest_len = usize::value_from(cai_ifd_entry.value_count) + .map_err(|_err| Error::InvalidAsset("TIFF/DNG out of range".to_string()))?; + + Ok(vec![HashObjectPositions { + offset: manifest_offset, + length: manifest_len, + htype: HashBlockObjectType::Cai, + }]) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + let (mut idfs, page_0, e, big_tiff) = map_tiff(input_stream)?; + + let mut bo = ByteOrdered::new(output_stream, e); + let mut tc = TiffCloner::new(e, big_tiff, &mut bo)?; + + idfs[page_0].data.entries.remove(&C2PA_TAG); + tc.clone_tiff(&mut idfs, page_0, input_stream)?; + Ok(()) + } +} + +impl AssetPatch for TiffIO { + fn patch_cai_store(&self, asset_path: &std::path::Path, store_bytes: &[u8]) -> Result<()> { + let mut asset_io = OpenOptions::new() + .write(true) + .read(true) + .create(false) + .open(asset_path)?; + + let (tiff_tree, page_0, e, big_tiff) = map_tiff(&mut asset_io)?; + + let first_ifd = &tiff_tree[page_0].data; + + let cai_ifd_entry = first_ifd.get_tag(C2PA_TAG).ok_or(Error::JumbfNotFound)?; + + // make sure data type is for unstructured data + if cai_ifd_entry.entry_type != C2PA_FIELD_TYPE { + return Err(Error::InvalidAsset( + "Ifd entry for C2PA must be type UNKNOWN(7)".to_string(), + )); + } + + let manifest_len: usize = usize::value_from(cai_ifd_entry.value_count) + .map_err(|_err| Error::InvalidAsset("TIFF/DNG out of range".to_string()))?; + + if store_bytes.len() == manifest_len { + // move read point to start of entry + let decoded_offset = decode_offset(cai_ifd_entry.value_offset, e, big_tiff)?; + asset_io.seek(SeekFrom::Start(decoded_offset))?; + + asset_io.write_all(store_bytes)?; + Ok(()) + } else { + Err(Error::InvalidAsset( + "patch_cai_store store size mismatch.".to_string(), + )) + } + } +} + +impl RemoteRefEmbed for TiffIO { + #[allow(unused_variables)] + fn embed_reference( + &self, + asset_path: &Path, + embed_ref: crate::asset_io::RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + let output_buf = Vec::new(); + let mut output_stream = Cursor::new(output_buf); + + // block so that source file is closed after embed + { + let mut source_stream = std::fs::File::open(asset_path)?; + self.embed_reference_to_stream( + &mut source_stream, + &mut output_stream, + RemoteRefEmbedType::Xmp(manifest_uri), + )?; + } + + // write will replace exisiting contents + output_stream.rewind()?; + std::fs::write(asset_path, output_stream.into_inner())?; + Ok(()) + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } + + fn embed_reference_to_stream( + &self, + source_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + embed_ref: RemoteRefEmbedType, + ) -> Result<()> { + match embed_ref { + crate::asset_io::RemoteRefEmbedType::Xmp(manifest_uri) => { + let xmp = match self.get_reader().read_xmp(source_stream) { + Some(xmp) => add_provenance(&xmp, &manifest_uri)?, + None => { + let xmp = format!("http://ns.adobe.com/xap/1.0/\0 {}", MIN_XMP); + add_provenance(&xmp, &manifest_uri)? + } + }; + + let l = u64::value_from(xmp.len()) + .map_err(|_err| Error::InvalidAsset("value out of range".to_string()))?; + + let entry = IfdClonedEntry { + entry_tag: XMP_TAG, + entry_type: IFDEntryType::Byte as u16, + value_count: l, + value_bytes: xmp.as_bytes().to_vec(), + }; + tiff_clone_with_tags(output_stream, source_stream, vec![entry]) + } + crate::asset_io::RemoteRefEmbedType::StegoS(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::StegoB(_) => Err(Error::UnsupportedType), + crate::asset_io::RemoteRefEmbedType::Watermark(_) => Err(Error::UnsupportedType), + } + } +} + +impl ComposedManifestRef for TiffIO { + // Return entire CAI block as Vec + fn compose_manifest(&self, manifest_data: &[u8], _format: &str) -> Result> { + Ok(manifest_data.to_vec()) + } +} + +#[cfg(test)] +pub mod tests { + #![allow(clippy::panic)] + #![allow(clippy::unwrap_used)] + + use core::panic; + + use tempfile::tempdir; + + use super::*; + use crate::utils::test::temp_dir_path; + + #[test] + fn test_read_write_manifest() { + let data = "some data"; + + let source = crate::utils::test::fixture_path("TUSCANY.TIF"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "test.tif"); + + std::fs::copy(source, &output).unwrap(); + + let tiff_io = TiffIO {}; + + // save data to tiff + tiff_io.save_cai_store(&output, data.as_bytes()).unwrap(); + + // read data back + let loaded = tiff_io.read_cai_store(&output).unwrap(); + + assert_eq!(&loaded, data.as_bytes()); + } + + #[test] + fn test_write_xmp() { + let data = "some data"; + + let source = crate::utils::test::fixture_path("TUSCANY.TIF"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "test.tif"); + + std::fs::copy(source, &output).unwrap(); + + let tiff_io = TiffIO {}; + + // save data to tiff + let eh = tiff_io.remote_ref_writer_ref().unwrap(); + eh.embed_reference(&output, RemoteRefEmbedType::Xmp(data.to_string())) + .unwrap(); + + // read data back + let mut output_stream = std::fs::File::open(&output).unwrap(); + let xmp = tiff_io.read_xmp(&mut output_stream).unwrap(); + let loaded = crate::utils::xmp_inmemory_utils::extract_provenance(&xmp).unwrap(); + + assert_eq!(&loaded, data); + } + + #[test] + fn test_remove_manifest() { + let data = "some data"; + + let source = crate::utils::test::fixture_path("TUSCANY.TIF"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "test.tif"); + + std::fs::copy(source, &output).unwrap(); + + let tiff_io = TiffIO {}; + + // first make sure that calling this without a manifest does not error + tiff_io.remove_cai_store(&output).unwrap(); + + // save data to tiff + tiff_io.save_cai_store(&output, data.as_bytes()).unwrap(); + + // read data back + let loaded = tiff_io.read_cai_store(&output).unwrap(); + + assert_eq!(&loaded, data.as_bytes()); + + tiff_io.remove_cai_store(&output).unwrap(); + + match tiff_io.read_cai_store(&output) { + Err(Error::JumbfNotFound) => (), + _ => panic!("should be no C2PA store"), + } + } + + #[test] + fn test_get_object_location() { + let data = "some data"; + + let source = crate::utils::test::fixture_path("TUSCANY.TIF"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "test.tif"); + + std::fs::copy(source, &output).unwrap(); + + let tiff_io = TiffIO {}; + + // save data to tiff + tiff_io.save_cai_store(&output, data.as_bytes()).unwrap(); + + // read data back + let loaded = tiff_io.read_cai_store(&output).unwrap(); + + assert_eq!(&loaded, data.as_bytes()); + + let mut success = false; + if let Ok(locations) = tiff_io.get_object_locations(&output) { + for op in locations { + if op.htype == HashBlockObjectType::Cai { + let mut of = std::fs::File::open(&output).unwrap(); + + let mut manifests_buf: Vec = vec![0u8; op.length]; + of.seek(SeekFrom::Start(op.offset as u64)).unwrap(); + of.read_exact(manifests_buf.as_mut_slice()).unwrap(); + if crate::hash_utils::vec_compare(&manifests_buf, data.as_bytes()) { + success = true; + } + } + } + } + assert!(success); + } + /* disable until I find smaller DNG + #[test] + fn test_read_write_dng_manifest() { + let data = "some data"; + + let source = crate::utils::test::fixture_path("test.DNG"); + //let source = crate::utils::test::fixture_path("sample1.dng"); + + let temp_dir = tempdir().unwrap(); + let output = temp_dir_path(&temp_dir, "test.DNG"); + + std::fs::copy(&source, &output).unwrap(); + + let tiff_io = TiffIO {}; + + // save data to tiff + tiff_io.save_cai_store(&output, data.as_bytes()).unwrap(); + + // read data back + println!("Reading TIFF"); + let loaded = tiff_io.read_cai_store(&output).unwrap(); + + assert_eq!(&loaded, data.as_bytes()); + } + #[test] + fn test_read_write_dng_parse() { + //let data = "some data"; + + let source = crate::utils::test::fixture_path("test.DNG"); + let mut f = std::fs::File::open(&source).unwrap(); + + let (idfs, token, _endianness, _big_tiff) = map_tiff(&mut f).unwrap(); + + println!("IFD {}", idfs[token].data.entry_cnt); + } + */ +} diff --git a/sdk/crates/c2pa-codecs/src/delegate.rs b/sdk/crates/c2pa-codecs/src/delegate.rs new file mode 100644 index 000000000..95507cadd --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/delegate.rs @@ -0,0 +1,40 @@ +use std::{ + io::{self, BufRead, BufReader, Read, Seek, Write}, + num, +}; + +use crate::{ + codecs::{c2pa::C2paCodec, gif::GifCodec, svg::SvgCodec}, + protocols::*, + Codec, CodecError, +}; // TODO: for now + +macro_rules! codec_list { + ($macro:ident) => { + $macro_id! { + C2paCodec, GifCodec, SvgCodec, + } + }; +} + +macro_rules! codec_from { + ($src:expr, $check_fn:ident, $($codec:tt),*) => { + $( + if $codec::$check_fn($src)? { + return Ok(Self::$codec($codec::new($src))); + } + )* + Err(CodecError::UnknownFormat) + }; +} + +impl Codec { + pub fn from_stream(mut src: R) -> Result { + src.rewind()?; + let mut src = BufReader::with_capacity(Codec::MAX_SIGNATURE_LEN, src); + src.fill_buf()?; + + codec_list!(codec_from) + codec_from!(&mut src, supports_stream, codec_list!()) + } +} diff --git a/sdk/crates/c2pa-codecs/src/lib.rs b/sdk/crates/c2pa-codecs/src/lib.rs new file mode 100644 index 000000000..a2a07aad2 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/lib.rs @@ -0,0 +1,320 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{ + io::{self, Read, Seek, Write}, + num, +}; + +use codecs::{c2pa::C2paCodec, gif::GifCodec, jpeg::JpegCodec, svg::SvgCodec}; +pub use protocols::*; // TODO: for now +use thiserror::Error; + +pub mod codecs; +mod protocols; +mod xmp; + +// TODO: WRITE MACROS!!! +// TODO: add other codecs +// TODO: users should wrap it in their own BufReader, don't include in impl (like svg) + +pub enum Codec { + C2pa(C2paCodec), + Gif(GifCodec), + Svg(SvgCodec), + Jpeg(JpegCodec), + External(E), +} + +impl Codec { + pub fn from_stream(mut src: R) -> Result { + src.rewind()?; + let mut signature = vec![0; Codec::MAX_SIGNATURE_LEN]; + src.read_exact(&mut signature)?; + + // TODO: if one of these methods error, then skip it + // TODO: also need to rewind streams in the case of svg + if C2paCodec::supports_signature(&signature) { + Ok(Self::C2pa(C2paCodec::new(src))) + } else if GifCodec::supports_signature(&signature) { + Ok(Self::Gif(GifCodec::new(src))) + } else if JpegCodec::supports_signature(&signature) { + Ok(Self::Jpeg(JpegCodec::new(src))) + } else { + src.rewind()?; + if SvgCodec::supports_stream(&mut src)? { + Ok(Self::Svg(SvgCodec::new(src))) + } else { + Err(CodecError::UnknownFormat) + } + } + } + + pub fn from_extension(extension: &str, src: R) -> Result { + if C2paCodec::supports_extension(extension) { + Ok(Self::C2pa(C2paCodec::new(src))) + } else if GifCodec::supports_extension(extension) { + Ok(Self::Gif(GifCodec::new(src))) + } else if SvgCodec::supports_extension(extension) { + Ok(Self::Svg(SvgCodec::new(src))) + } else if JpegCodec::supports_extension(extension) { + Ok(Self::Jpeg(JpegCodec::new(src))) + } else { + Err(CodecError::UnknownFormat) + } + } + + pub fn from_mime(mime: &str, src: R) -> Result { + if C2paCodec::supports_mime(mime) { + Ok(Self::C2pa(C2paCodec::new(src))) + } else if GifCodec::supports_mime(mime) { + Ok(Self::Gif(GifCodec::new(src))) + } else if SvgCodec::supports_mime(mime) { + Ok(Self::Svg(SvgCodec::new(src))) + } else if JpegCodec::supports_mime(mime) { + Ok(Self::Jpeg(JpegCodec::new(src))) + } else { + Err(CodecError::UnknownFormat) + } + } +} + +impl Codec { + pub fn from_external(external: E) -> Self { + Self::External(external) + } +} + +impl Encode for Codec { + fn write_c2pa(&mut self, dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + match self { + Codec::Gif(codec) => codec.write_c2pa(dst, c2pa), + Codec::C2pa(codec) => codec.write_c2pa(dst, c2pa), + Codec::Svg(codec) => codec.write_c2pa(dst, c2pa), + Codec::Jpeg(codec) => codec.write_c2pa(dst, c2pa), + Codec::External(codec) => codec.write_c2pa(dst, c2pa), + } + } + + fn remove_c2pa(&mut self, dst: impl Write) -> Result { + match self { + Codec::Gif(codec) => codec.remove_c2pa(dst), + Codec::C2pa(codec) => codec.remove_c2pa(dst), + Codec::Svg(codec) => codec.remove_c2pa(dst), + Codec::Jpeg(codec) => codec.remove_c2pa(dst), + Codec::External(codec) => codec.remove_c2pa(dst), + } + } + + fn write_xmp(&mut self, dst: impl Write, xmp: &str) -> Result<(), CodecError> { + match self { + Codec::Gif(codec) => codec.write_xmp(dst, xmp), + Codec::C2pa(codec) => codec.write_xmp(dst, xmp), + Codec::Svg(codec) => codec.write_xmp(dst, xmp), + Codec::Jpeg(codec) => codec.write_xmp(dst, xmp), + Codec::External(codec) => codec.write_xmp(dst, xmp), + } + } +} + +impl EncodeInPlace for Codec { + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError> { + match self { + Codec::Gif(codec) => codec.patch_c2pa(c2pa), + Codec::C2pa(codec) => codec.patch_c2pa(c2pa), + Codec::Svg(codec) => codec.patch_c2pa(c2pa), + // TODO: + Codec::Jpeg(_) => Err(CodecError::Unsupported), + Codec::External(codec) => codec.patch_c2pa(c2pa), + } + } +} + +impl Decode for Codec { + fn read_c2pa(&mut self) -> Result>, CodecError> { + match self { + Codec::Gif(codec) => codec.read_c2pa(), + Codec::C2pa(codec) => codec.read_c2pa(), + Codec::Svg(codec) => codec.read_c2pa(), + Codec::Jpeg(codec) => codec.read_c2pa(), + Codec::External(codec) => codec.read_c2pa(), + } + } + + fn read_xmp(&mut self) -> Result, CodecError> { + match self { + Codec::Gif(codec) => codec.read_xmp(), + Codec::C2pa(codec) => codec.read_xmp(), + Codec::Svg(codec) => codec.read_xmp(), + Codec::Jpeg(codec) => codec.read_xmp(), + Codec::External(codec) => codec.read_xmp(), + } + } +} + +impl Embed for Codec { + fn embeddable(bytes: &[u8]) -> Result { + Err(CodecError::Unsupported) + } + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError> { + match self { + Codec::Gif(codec) => codec.embed(embeddable, dst), + Codec::C2pa(codec) => codec.embed(embeddable, dst), + Codec::Svg(codec) => codec.embed(embeddable, dst), + Codec::Jpeg(codec) => codec.embed(embeddable, dst), + Codec::External(codec) => codec.embed(embeddable, dst), + } + } +} + +impl Span for Codec { + fn span(&mut self) -> Result { + match self { + Codec::Gif(codec) => codec.span(), + Codec::C2pa(codec) => codec.span(), + Codec::Svg(codec) => codec.span(), + Codec::Jpeg(codec) => codec.span(), + Codec::External(codec) => codec.span(), + } + } + + fn c2pa_span(&mut self) -> Result { + match self { + Codec::Gif(codec) => codec.c2pa_span(), + Codec::C2pa(codec) => codec.c2pa_span(), + Codec::Svg(codec) => codec.c2pa_span(), + Codec::Jpeg(codec) => codec.c2pa_span(), + Codec::External(codec) => codec.c2pa_span(), + } + } + + fn box_span(&mut self) -> Result { + match self { + Codec::Gif(codec) => codec.box_span(), + Codec::C2pa(codec) => codec.box_span(), + Codec::Svg(codec) => codec.box_span(), + Codec::Jpeg(codec) => codec.box_span(), + Codec::External(codec) => codec.box_span(), + } + } + + fn bmff_span(&mut self) -> Result { + match self { + Codec::Gif(codec) => codec.bmff_span(), + Codec::C2pa(codec) => codec.bmff_span(), + Codec::Svg(codec) => codec.bmff_span(), + Codec::Jpeg(codec) => codec.bmff_span(), + Codec::External(codec) => codec.bmff_span(), + } + } + + fn collection_span(&mut self) -> Result { + match self { + Codec::Gif(codec) => codec.collection_span(), + Codec::C2pa(codec) => codec.collection_span(), + Codec::Svg(codec) => codec.collection_span(), + Codec::Jpeg(codec) => codec.collection_span(), + Codec::External(codec) => codec.collection_span(), + } + } +} + +impl Support for Codec<()> { + // TODO: find max signatuture len among all codecs via Supporter::MAX_SIGNATURE_LEN + const MAX_SIGNATURE_LEN: usize = 13; + + fn supports_signature(signature: &[u8]) -> bool { + GifCodec::supports_signature(signature) + || C2paCodec::supports_signature(signature) + || JpegCodec::supports_signature(signature) + } + + fn supports_stream(mut src: impl Read + Seek) -> Result { + src.rewind()?; + let mut signature = vec![0; Codec::MAX_SIGNATURE_LEN]; + src.read_exact(&mut signature)?; + + match Codec::supports_signature(&signature) { + true => Ok(true), + false => { + src.rewind()?; + SvgCodec::supports_stream(src) + } + } + } + + fn supports_extension(extension: &str) -> bool { + GifCodec::supports_extension(extension) + || C2paCodec::supports_extension(extension) + || SvgCodec::supports_extension(extension) + || JpegCodec::supports_extension(extension) + } + + fn supports_mime(mime: &str) -> bool { + GifCodec::supports_mime(mime) + || SvgCodec::supports_mime(mime) + || C2paCodec::supports_mime(mime) + || JpegCodec::supports_mime(mime) + } +} + +#[derive(Debug, Error)] +pub enum CodecError { + // NOTE: unsupported refers to a function that is explicitly not supported in the spec + #[error("TODO")] + Unsupported, + + // NOTE: whereas, unimplemented is not yet implemented, but is supported in the spec + #[error("TODO")] + Unimplemented, + + #[error("Unknown format while creating the Codec.")] + UnknownFormat, + + #[error("Incorrect file format for the codec.")] + IncorrectFormat, + + #[error("Attempted to patch a file without an existing manifest.")] + NothingToPatch, + + #[error("Invalid size of patch, expected {expected}, got {actual}.")] + InvalidPatchSize { expected: u64, actual: u64 }, + + #[error("More than one C2PA manifest was found inside the file.")] + MoreThanOneC2pa, + + // This case occurs, for instance, when the magic trailer at the end of an XMP block in a GIF + // does not conform to spec or the string is not valid UTF-8. + #[error("XMP was found, but failed to validate.")] + InvalidXmpBlock, + + #[error("TODO")] + InvalidAsset { + src: Option, + context: String, + }, + + #[error("Attempted to seek out of bounds.")] + SeekOutOfBounds(num::TryFromIntError), + + // TODO: use quick_xml + // TODO: it may be more ideal to convert this error to a string, the user most likely doesn't care the exact type + // and we don't want to add an external API to our API + // This occurs when we fail to parse the XML in the XMP string. + #[error("TODO")] + XmpParseError(#[source] fast_xml::Error), + + #[error("TODO")] + IoError(#[from] io::Error), +} diff --git a/sdk/crates/c2pa-codecs/src/protocols.rs b/sdk/crates/c2pa-codecs/src/protocols.rs new file mode 100644 index 000000000..0ee8ed49b --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/protocols.rs @@ -0,0 +1,227 @@ +use std::io::{BufReader, Read, Seek, Write}; + +use crate::{xmp, CodecError}; + +// NOTE: the reason encoders/decoders take &mut self and no src is because they take them on construction. +// in a normal gif signing flow, we read, write, read, then write again There's a lot of info we can cache. +// TODO: document stream position behavior, it should assume it starts where requested and there is no guarantee on where it ends, the caller can handle restoration +pub trait Encode { + // TODO: should we require this function to search for existing c2pa manfiests? + /// Writes the C2PA block with the specified manifest or replaces it if it already exists. + fn write_c2pa(&mut self, dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError>; + + /// Removes the C2PA block from the stream or returns false if a C2PA block was not found. + fn remove_c2pa(&mut self, dst: impl Write) -> Result; + + fn write_xmp(&mut self, dst: impl Write, xmp: &str) -> Result<(), CodecError> { + let _ = dst; + let _ = xmp; + Err(CodecError::Unimplemented) + } + + fn write_xmp_provenance(&mut self, dst: impl Write, provenance: &str) -> Result<(), CodecError> + where + Self: Decode, + { + let existing_xmp = self + .read_xmp()? + .unwrap_or_else(|| format!("http://ns.adobe.com/xap/1.0/\0 {}", xmp::MIN_XMP)); + self.write_xmp(dst, &xmp::add_provenance(&existing_xmp, provenance)?) + } + + fn remove_xmp(&mut self, dst: impl Write, xmp: &str) -> Result<(), CodecError> { + let _ = dst; + let _ = xmp; + Err(CodecError::Unimplemented) + } + + fn remove_xmp_provenance(&mut self, dst: impl Write) -> Result<(), CodecError> + where + Self: Decode, + { + todo!() + } +} + +pub trait EncodeInPlace { + /// Replaces the C2PA block with the specified manifest ONLY if the given manifest is the same exact + /// size as the existing C2PA block. + /// + /// If no C2PA block was found, then errors with [`ParseError::NothingToPatch`]. + /// If the size of the found C2PA block differs, then errors with [`ParseError::InvalidPatchSize`]. + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError>; +} + +pub trait Decode { + fn read_c2pa(&mut self) -> Result>, CodecError>; + + fn read_xmp(&mut self) -> Result, CodecError> { + Err(CodecError::Unimplemented) + } + + fn read_xmp_provenance(&mut self) -> Result, CodecError> { + todo!() + } +} + +pub trait Embed { + fn embeddable(bytes: &[u8]) -> Result; + + // fn read_embeddable(&mut self) -> Embeddable; + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError>; +} + +pub trait Span { + fn span(&mut self) -> Result; + + // TODO: document that if there is no c2pa manifest it should return where it should be + // TODO: what happens if a data hash has multiple placeholder locations? how does the code know where to hash? + fn c2pa_span(&mut self) -> Result { + Err(CodecError::Unimplemented) + } + + // TODO: read above + fn box_span(&mut self) -> Result { + Err(CodecError::Unimplemented) + } + + fn bmff_span(&mut self) -> Result { + Err(CodecError::Unimplemented) + } + + fn collection_span(&mut self) -> Result { + Err(CodecError::Unimplemented) + } +} + +pub trait Support { + const MAX_SIGNATURE_LEN: usize; + + fn supports_signature(signature: &[u8]) -> bool { + let _ = signature; + false + } + + // Not all file types support a signature (e.g. SVG), but some can be inferred based + // on their structure. That operation is likely expensive, which is why we separate it + // into a supports_stream method. + fn supports_stream(src: impl Read + Seek) -> Result { + let _ = src; + Err(CodecError::Unimplemented) + } + + // fn supports_signature_from_stream(mut src: impl Read) -> Result { + // let mut signature = Vec::with_capacity(Self::MAX_SIGNATURE_LEN); + // src.read_exact(&mut signature)?; + // Self::supports_signature(&signature) + // } + + fn supports_extension(extension: &str) -> bool; + + fn supports_mime(mime: &str) -> bool; +} + +#[derive(Debug)] +pub struct Embeddable { + pub bytes: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct ByteSpan { + pub start: u64, + pub len: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct NamedByteSpan { + pub names: Vec, + pub span: ByteSpan, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct C2paSpan { + /// Span of bytes that encompass the manifest with specifical consideration + /// for some formats defined in the spec. + pub spans: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct BoxSpan { + /// Span of bytes for each block, corresponding to their box name as defined + /// in the spec. + pub spans: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct BmffSpan { + // TODO +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CollectionSpan { + pub zip_central_directory_span: Option, + pub uri_spans: Vec, +} + +#[derive(Debug)] +pub enum DefaultSpan { + Data(C2paSpan), + Box(BoxSpan), + Bmff(BmffSpan), + Collection(CollectionSpan), +} + +impl Encode for () { + fn write_c2pa(&mut self, dst: impl Write, c2pa: &[u8]) -> Result<(), CodecError> { + Err(CodecError::Unsupported) + } + + fn remove_c2pa(&mut self, dst: impl Write) -> Result { + Err(CodecError::Unsupported) + } +} + +impl EncodeInPlace for () { + fn patch_c2pa(&mut self, c2pa: &[u8]) -> Result<(), CodecError> { + Err(CodecError::Unsupported) + } +} + +impl Decode for () { + fn read_c2pa(&mut self) -> Result>, CodecError> { + Err(CodecError::Unsupported) + } +} + +impl Embed for () { + fn embeddable(bytes: &[u8]) -> Result { + Err(CodecError::Unsupported) + } + + fn embed(&mut self, embeddable: Embeddable, dst: impl Write) -> Result<(), CodecError> { + Err(CodecError::Unsupported) + } +} + +impl Span for () { + fn span(&mut self) -> Result { + Err(CodecError::Unsupported) + } +} + +impl Support for () { + const MAX_SIGNATURE_LEN: usize = 0; + + fn supports_stream(src: impl Read + Seek) -> Result { + Ok(false) + } + + fn supports_extension(extension: &str) -> bool { + false + } + + fn supports_mime(mime: &str) -> bool { + false + } +} diff --git a/sdk/crates/c2pa-codecs/src/xmp.rs b/sdk/crates/c2pa-codecs/src/xmp.rs new file mode 100644 index 000000000..e0ed71275 --- /dev/null +++ b/sdk/crates/c2pa-codecs/src/xmp.rs @@ -0,0 +1,254 @@ +// Copyright 2022 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::io::{Cursor, Read, Seek}; + +use fast_xml::{ + events::{BytesStart, Event}, + Reader, Writer, +}; + +use crate::{Codec, Decode, CodecError}; + +const RDF_DESCRIPTION: &[u8] = b"rdf:Description"; + +pub const MIN_XMP: &str = r#" "#; + +#[derive(Default)] +pub struct XmpInfo { + pub document_id: Option, + pub instance_id: Option, + pub provenance: Option, +} + +impl XmpInfo { + /// search xmp data for provenance, documentID and instanceID + pub fn from_source(src: impl Read + Seek) -> Result, CodecError> { + match Codec::from_stream(src)?.read_xmp()? { + Some(xmp) => { + Ok(Some(Self { + // todo: do this in one pass through XMP + document_id: extract_document_id(&xmp), + instance_id: extract_instance_id(&xmp), + provenance: extract_provenance(&xmp), + })) + } + None => Ok(None), + } + } +} + +/// Extract an a value from XMP using a key +fn extract_xmp_key(xmp: &str, key: &str) -> Option { + let mut reader = Reader::from_str(xmp); + reader.trim_text(true); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { + if e.name() == RDF_DESCRIPTION { + // attribute case + let value = e.attributes().find(|a| { + if let Ok(attribute) = a { + attribute.key == key.as_bytes() + } else { + false + } + }); + if let Some(Ok(attribute)) = value { + if let Ok(s) = String::from_utf8(attribute.value.to_vec()) { + return Some(s); + } + } + } else if e.name() == key.as_bytes() { + // tag case + let mut buf: Vec = Vec::new(); + if let Ok(s) = reader.read_text(e.name(), &mut buf) { + return Some(s); + } + } + } + Ok(Event::Eof) => break, + _ => {} + } + buf.clear(); + } + None +} + +// writes the event to the writer) +/// Add a value to XMP using a key, replaces the value if the key exists +fn add_xmp_key(xmp: &str, key: &str, value: &str) -> Result { + let mut reader = Reader::from_str(xmp); + reader.trim_text(true); + let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 2); + let mut buf = Vec::new(); + let mut added = false; + loop { + let event = reader + .read_event(&mut buf) + .map_err(CodecError::XmpParseError)?; + // println!("{:?}", event); + match event { + Event::Start(ref e) if e.name() == RDF_DESCRIPTION => { + // creates a new element + let mut elem = BytesStart::owned(RDF_DESCRIPTION.to_vec(), RDF_DESCRIPTION.len()); + + for attr in e.attributes() { + match attr { + Ok(attr) => { + if attr.key == key.as_bytes() { + // replace the key/value if it exists + elem.push_attribute((key, value)); + added = true; + } else { + // add all other existing elements + elem.extend_attributes([attr]); + } + } + Err(e) => { + return Err(CodecError::XmpParseError(fast_xml::Error::InvalidAttr(e))); + } + } + } + if !added { + // didn't exist, so add it + elem.push_attribute((key, value)); + } + // writes the event to the writer + writer + .write_event(Event::Start(elem)) + .map_err(CodecError::XmpParseError)?; + } + Event::Empty(ref e) if e.name() == RDF_DESCRIPTION => { + // creates a new element + let mut elem = BytesStart::owned(RDF_DESCRIPTION.to_vec(), RDF_DESCRIPTION.len()); + for attr in e.attributes() { + match attr { + Ok(attr) => { + if attr.key == key.as_bytes() { + // replace the key/value if it exists + elem.push_attribute((key, value)); + added = true; + } else { + // add all other existing elements + elem.extend_attributes([attr]); + } + } + Err(e) => { + return Err(CodecError::XmpParseError(fast_xml::Error::InvalidAttr(e))); + } + } + } + if !added { + // didn't exist, so add it + elem.push_attribute((key, value)); + } + // writes the event to the writer + writer + .write_event(Event::Empty(elem)) + .map_err(CodecError::XmpParseError)?; + } + Event::Eof => break, + e => { + writer.write_event(e).map_err(CodecError::XmpParseError)?; + } + } + } + buf.clear(); + let result = writer.into_inner().into_inner(); + String::from_utf8(result).map_err(|_| CodecError::InvalidXmpBlock) +} + +/// extract the dc:provenance value from xmp +pub fn extract_provenance(xmp: &str) -> Option { + extract_xmp_key(xmp, "dcterms:provenance") +} + +/// extract the xmpMM:InstanceID value from xmp +fn extract_instance_id(xmp: &str) -> Option { + extract_xmp_key(xmp, "xmpMM:InstanceID") +} + +/// extract the "xmpMM:DocumentID" value from xmp +fn extract_document_id(xmp: &str) -> Option { + extract_xmp_key(xmp, "xmpMM:DocumentID") +} + +/// add or replace a dc:provenance value to xmp, including dc:terms if needed +pub fn add_provenance(xmp: &str, provenance: &str) -> Result { + let xmp = add_xmp_key(xmp, "xmlns:dcterms", "http://purl.org/dc/terms/")?; + add_xmp_key(&xmp, "dcterms:provenance", provenance) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::expect_used)] + #![allow(clippy::unwrap_used)] + + //use env_logger; + use super::*; + + const XMP_DATA: &str = r#" + + + + + + "#; + + const PROVENANCE: &str = + "self#jumbf=c2pa/contentauth:urn:uuid:a58065fb-79ae-4eb3-87b9-a19830860059/c2pa.claim"; + + #[test] + fn read_xmp() { + let provenance = extract_provenance(XMP_DATA); + assert_eq!(provenance, Some("self#jumbf=c2pa/contentauth:urn:uuid:a58065fb-79ae-4eb3-87b9-a19830860059/c2pa.claim".to_owned())); + let document_id = extract_document_id(XMP_DATA); + assert_eq!( + document_id, + Some("xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b".to_owned()) + ); + let instance_id = extract_instance_id(XMP_DATA); + assert_eq!( + instance_id, + Some("xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b".to_owned()) + ); + let unicorn = extract_xmp_key(XMP_DATA, "unicorn"); + assert_eq!(unicorn, None); + let bad_xmp = extract_xmp_key("bad xmp", "unicorn"); + assert_eq!(bad_xmp, None); + } + + #[test] + fn add_xmp() { + let xmp = add_provenance(XMP_DATA, PROVENANCE).expect("adding provenance"); + let unicorn = extract_provenance(&xmp); + println!("{xmp}"); + assert_eq!(unicorn, Some(PROVENANCE.to_string())); + + let xmp = add_provenance(MIN_XMP, PROVENANCE).expect("adding provenance"); + let unicorn = extract_provenance(&xmp); + println!("{xmp}"); + assert_eq!(unicorn, Some(PROVENANCE.to_string())); + } +} diff --git a/sdk/crates/c2pa-codecs/tests/c2pa.rs b/sdk/crates/c2pa-codecs/tests/c2pa.rs new file mode 100644 index 000000000..979e86c18 --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/c2pa.rs @@ -0,0 +1,187 @@ +use std::io::{self, Cursor}; + +use c2pa_codecs::{Codec, CodecError, Decode, Encode, EncodeInPlace}; +use common::{ASSETS, RANDOM_JUMBF_BYTES1, RANDOM_JUMBF_BYTES2, RANDOM_JUMBF_BYTES3}; + +mod common; + +#[test] +fn test_c2pa_read() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_c2pa { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_c2pa(), Ok(None))); + } + Ok(()) +} + +#[test] +fn test_c2pa_write() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_c2pa || !asset.supports_write_c2pa { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_c2pa(), Ok(None))); + + // Write random bytes. + let random_bytes = RANDOM_JUMBF_BYTES1; + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_c2pa(&mut dst, random_bytes), Ok(()))); + + // Read the c2pa. + let mut codec = Codec::from_stream(&mut dst)?; + assert_eq!(codec.read_c2pa()?.as_deref(), Some(random_bytes)); + } + Ok(()) +} + +#[test] +fn test_c2pa_replace() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_c2pa || !asset.supports_remove_c2pa || !asset.supports_write_c2pa { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_c2pa(), Ok(None))); + + // Write random bytes. + let random_bytes = RANDOM_JUMBF_BYTES1; + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_c2pa(&mut dst, random_bytes), Ok(()))); + + // Write some more random bytes (should replace). + let random_bytes = RANDOM_JUMBF_BYTES2; + let mut codec = Codec::from_stream(&mut dst)?; + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_c2pa(&mut dst, random_bytes), Ok(()))); + + // Read the new replaced c2pa. + let mut codec = Codec::from_stream(&mut dst)?; + assert_eq!(codec.read_c2pa()?.as_deref(), Some(random_bytes)); + + // Remove the replaced c2pa (should exist). + let mut codec = Codec::from_stream(&mut dst)?; + let mut dst = Cursor::new(Vec::new()); + assert!(codec.remove_c2pa(&mut dst)?); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut dst)?; + assert_eq!(codec.read_c2pa()?.as_deref(), None); + + // TODO: svg isn't cleaning up the entire c2pa block on remove! + // Ensure dst is back to src. + // assert_eq!(src.into_inner(), dst.into_inner()); + } + Ok(()) +} + +#[test] +fn test_c2pa_remove() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_c2pa || !asset.supports_remove_c2pa || !asset.supports_write_c2pa { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_c2pa(), Ok(None))); + + // Remove the c2pa (none should be found). + assert!(!codec.remove_c2pa(&mut io::empty())?); + + // Write random bytes. + let random_bytes = RANDOM_JUMBF_BYTES1; + let mut codec = Codec::from_stream(&mut src)?; + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_c2pa(&mut dst, random_bytes), Ok(()))); + + // Remove the c2pa (it should exist). + let mut codec = Codec::from_stream(&mut dst)?; + let mut dst = Cursor::new(Vec::new()); + assert!(codec.remove_c2pa(&mut dst)?); + + // TODO: svg isn't cleaning up the entire c2pa block on remove! + // Ensure dst is back to src. + // assert_eq!(src.into_inner(), dst.into_inner()); + } + Ok(()) +} + +#[test] +fn test_c2pa_patch() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_c2pa + || !asset.supports_patch_c2pa + || !asset.supports_write_c2pa + || !asset.supports_remove_c2pa + { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + // Read the c2pa (none should exist). + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_c2pa(), Ok(None))); + + // Try to patch bytes (should not work). + let random_bytes = RANDOM_JUMBF_BYTES1; + let mut dst = Cursor::new(asset.bytes.to_owned()); + let mut codec = Codec::from_stream(&mut dst)?; + assert!(matches!( + codec.patch_c2pa(random_bytes), + Err(CodecError::NothingToPatch) + )); + + // Write random bytes. + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_c2pa(&mut dst, random_bytes), Ok(()))); + + // Patch bytes. + let random_bytes = RANDOM_JUMBF_BYTES2; + let mut codec = Codec::from_stream(&mut dst)?; + assert!(matches!(codec.patch_c2pa(random_bytes), Ok(()))); + + // Read the c2pa. + assert_eq!(codec.read_c2pa()?.as_deref(), Some(random_bytes)); + + // Patch bytes with incorrect size. + let random_bytes = RANDOM_JUMBF_BYTES3; + assert!(matches!( + codec.patch_c2pa(random_bytes).unwrap_err(), + // We don't know what the expected/actual patch size is because they are + // based on the encoding of the individual file formats block. + CodecError::InvalidPatchSize { + expected: _, + actual: _ + } + )); + + // Remove the c2pa (it should exist). + let mut codec = Codec::from_stream(&mut dst)?; + let mut dst = Cursor::new(Vec::new()); + assert!(codec.remove_c2pa(&mut dst)?); + + // TODO: svg isn't cleaning up the entire c2pa block on remove! + // Ensure dst is back to src. + // assert_eq!(src.into_inner(), dst.into_inner()); + } + Ok(()) +} diff --git a/sdk/crates/c2pa-codecs/tests/common/mod.rs b/sdk/crates/c2pa-codecs/tests/common/mod.rs new file mode 100644 index 000000000..8e982cfef --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/common/mod.rs @@ -0,0 +1,232 @@ +// Instead of using random bytes we use random C2PA-box formatted JUMBF bytes. This is for compatibility with +// the JEPG codec, since it assumes the input is already JUMBF formatted (JUMBF is native to JPEG). +pub const RANDOM_JUMBF_BYTES1: &[u8] = &[ + // SuperBox + 0x00, 0x00, 0x00, 0x35, // LBox (total size 53 bytes) + 0x6a, 0x75, 0x6d, 0x62, // TBox ("jumb") + // DescriptionBox + 0x00, 0x00, 0x00, 0x19, // LBox (size 25 bytes) + 0x6a, 0x75, 0x6d, 0x64, // TBox ("jumd") + 0x63, 0x32, 0x70, 0x61, // Type ("c2pa" in ASCII) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Padding (8 bytes) + 0x00, 0x00, 0x00, 0x11, // Toggles + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x6e, 0x74, // Label ("number_cont") + // ContentBox + 0x00, 0x00, 0x00, 0x12, // LBox (size 18 bytes) + 0x6a, 0x73, 0x6f, 0x6e, // TBox ("json") + 0x5b, 0x30, 0x2c, 0x31, 0x2c, 0x32, 0x2c, 0x33, 0x2c, 0x34, // Payload Data [0,1,2,3,4] + 0x2c, 0x35, 0x2c, 0x36, 0x2c, 0x37, 0x2c, 0x38, 0x2c, 0x39, // Payload Data [5,6,7,8,9] + 0x5d, // Closing bracket for JSON array +]; +pub const RANDOM_JUMBF_BYTES2: &[u8] = &[ + // SuperBox + 0x00, 0x00, 0x00, 0x35, // LBox (total size 53 bytes) + 0x6a, 0x75, 0x6d, 0x62, // TBox ("jumb") + // DescriptionBox + 0x00, 0x00, 0x00, 0x19, // LBox (size 25 bytes) + 0x6a, 0x75, 0x6d, 0x64, // TBox ("jumd") + 0x63, 0x32, 0x70, 0x61, // Type ("c2pa" in ASCII) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Padding (8 bytes) + 0x00, 0x00, 0x00, 0x11, // Toggles + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x6e, 0x74, // Label ("number_cont") + // ContentBox + 0x00, 0x00, 0x00, 0x12, // LBox (size 18 bytes) + 0x6a, 0x73, 0x6f, 0x6e, // TBox ("json") + 0x5b, 0x39, 0x2c, 0x38, 0x2c, 0x37, 0x2c, 0x36, 0x2c, 0x35, // Payload Data [9,8,7,6,5] + 0x2c, 0x34, 0x2c, 0x33, 0x2c, 0x32, 0x2c, 0x31, 0x2c, 0x30, // Payload Data [4,3,2,1,0] + 0x5d, // Closing bracket for JSON array +]; +pub const RANDOM_JUMBF_BYTES3: &[u8] = &[ + // SuperBox + 0x00, 0x00, 0x00, 0x2d, // LBox (total size 45 bytes) + 0x6a, 0x75, 0x6d, 0x62, // TBox ("jumb") + // DescriptionBox + 0x00, 0x00, 0x00, 0x19, // LBox (size 25 bytes) + 0x6a, 0x75, 0x6d, 0x64, // TBox ("jumd") + 0x63, 0x32, 0x70, 0x61, // Type ("c2pa" in ASCII) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Padding (8 bytes) + 0x00, 0x00, 0x00, 0x11, // Toggles + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x6e, 0x74, // Label ("number_cont") + // ContentBox + 0x00, 0x00, 0x00, 0x08, // LBox (size 8 bytes) + 0x6a, 0x73, 0x6f, 0x6e, // TBox ("json") + 0x5b, 0x31, 0x2c, 0x32, 0x2c, 0x33, 0x2c, 0x34, // Payload Data [1,2,3,4] + 0x5d, // Closing bracket for JSON array +]; + +pub const RANDOM_XMP: &str = r#" + + + + +"#; + +pub const ASSETS: &[Asset] = &[ + Asset::new( + AssetType::Gif, + include_bytes!("../../../../tests/fixtures/sample1.gif"), + ), + Asset::new( + AssetType::Svg, + include_bytes!("../../../../tests/fixtures/sample1.svg"), + ), + Asset::new( + AssetType::Jpeg, + include_bytes!("../../../../tests/fixtures/empty.jpg"), + ), +]; + +#[derive(Debug)] +pub enum AssetType { + Gif, + Svg, + Jpeg, +} + +#[derive(Debug)] +pub struct Asset { + pub bytes: &'static [u8], + pub extension: &'static str, + pub mime: &'static str, + pub max_signature_len: usize, + pub asset_type: AssetType, + + // TODO: Are the fields below ever needed in a non-testing scenario? Typically a user would attempt + // an operation and handle the CodecError::Unsupported from there. To avoid the boilerplate of + // that for every function in every test, we pre-define them here. Should we add this functionality + // directly to the codecs through the Support trait? + // + pub supports_write_c2pa: bool, + pub supports_remove_c2pa: bool, + pub supports_write_xmp: bool, + pub supports_write_xmp_provenance: bool, + pub supports_remove_xmp: bool, + pub supports_remove_xmp_provenance: bool, + + pub supports_patch_c2pa: bool, + pub supports_read_c2pa: bool, + pub supports_read_xmp: bool, + pub supports_read_xmp_provenance: bool, + + pub supports_embeddable: bool, + pub supports_embed: bool, + + pub supports_span: bool, + pub supports_c2pa_span: bool, + pub supports_box_span: bool, + pub supports_bmff_span: bool, + pub supports_collection_span: bool, + + pub supports_supports_stream: bool, + pub supports_supports_extension: bool, + pub supports_supports_mime: bool, +} + +impl Asset { + pub const fn new(asset_type: AssetType, bytes: &'static [u8]) -> Self { + match asset_type { + AssetType::Gif => Asset { + bytes, + extension: "gif", + mime: "image/gif", + max_signature_len: 6, + asset_type, + + supports_write_c2pa: true, + supports_remove_c2pa: true, + supports_write_xmp: true, + supports_write_xmp_provenance: true, + supports_remove_xmp: true, + supports_remove_xmp_provenance: true, + + supports_patch_c2pa: true, + supports_read_c2pa: true, + supports_read_xmp: true, + supports_read_xmp_provenance: false, + + supports_embeddable: true, + supports_embed: true, + + supports_span: true, + supports_c2pa_span: true, + supports_box_span: true, + supports_bmff_span: true, + supports_collection_span: true, + + supports_supports_stream: true, + supports_supports_extension: true, + supports_supports_mime: true, + }, + AssetType::Svg => Asset { + bytes, + extension: "svg", + mime: "image/svg+xml", + max_signature_len: 0, + asset_type, + + supports_write_c2pa: true, + supports_remove_c2pa: true, + supports_write_xmp: false, + supports_write_xmp_provenance: false, + supports_remove_xmp: false, + supports_remove_xmp_provenance: false, + + supports_patch_c2pa: true, + supports_read_c2pa: true, + supports_read_xmp: false, + supports_read_xmp_provenance: false, + + supports_embeddable: true, + supports_embed: true, + + supports_span: true, + supports_c2pa_span: true, + supports_box_span: true, + supports_bmff_span: true, + supports_collection_span: true, + + supports_supports_stream: true, + supports_supports_extension: true, + supports_supports_mime: true, + }, + AssetType::Jpeg => Asset { + bytes, + extension: "jpg", + mime: "image/jpeg", + max_signature_len: 3, + asset_type, + + supports_write_c2pa: true, + supports_remove_c2pa: true, + supports_write_xmp: true, + supports_write_xmp_provenance: true, + supports_remove_xmp: true, + supports_remove_xmp_provenance: true, + + supports_patch_c2pa: false, + supports_read_c2pa: true, + supports_read_xmp: true, + supports_read_xmp_provenance: false, + + supports_embeddable: true, + supports_embed: true, + + supports_span: true, + supports_c2pa_span: true, + supports_box_span: true, + supports_bmff_span: true, + supports_collection_span: true, + + supports_supports_stream: true, + supports_supports_extension: true, + supports_supports_mime: true, + }, + } + } +} diff --git a/sdk/crates/c2pa-codecs/tests/embed.rs b/sdk/crates/c2pa-codecs/tests/embed.rs new file mode 100644 index 000000000..b865e29ec --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/embed.rs @@ -0,0 +1 @@ +// TODO: test embeddable and write embeddable w/ read/write c2pa as well diff --git a/sdk/crates/c2pa-codecs/tests/hashes.rs b/sdk/crates/c2pa-codecs/tests/hashes.rs new file mode 100644 index 000000000..f80de82d4 --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/hashes.rs @@ -0,0 +1,54 @@ +use c2pa_codecs::CodecError; +use common::ASSETS; + +mod common; + +// TODO: we are only testing that these things exist, not the exact property. Those must be tested within each codec itself. + +#[test] +fn test_data_hash() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_data_hash_placeholder() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_box_hash() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_box_hash_placeholder() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_bmff_hash() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_collection_hash() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} diff --git a/sdk/crates/c2pa-codecs/tests/suites.rs b/sdk/crates/c2pa-codecs/tests/suites.rs new file mode 100644 index 000000000..c90babac7 --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/suites.rs @@ -0,0 +1 @@ +// TODO: this runs integration tests over image test suites (like googles, pngsuite, etc.) diff --git a/sdk/crates/c2pa-codecs/tests/support.rs b/sdk/crates/c2pa-codecs/tests/support.rs new file mode 100644 index 000000000..dd7ee7d62 --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/support.rs @@ -0,0 +1,45 @@ +use std::io::Cursor; + +use c2pa_codecs::{Codec, CodecError, Support}; +use common::ASSETS; + +mod common; + +#[test] +fn test_supports_stream() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_supports_stream { + continue; + } + + assert!(matches!( + Codec::supports_stream(&mut Cursor::new(asset.bytes)), + Ok(true) + )); + } + Ok(()) +} + +#[test] +fn test_supports_extension() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_supports_extension { + continue; + } + + assert!(Codec::supports_extension(asset.extension)); + } + Ok(()) +} + +#[test] +fn test_supports_mime() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_supports_mime { + continue; + } + + assert!(Codec::supports_mime(asset.mime)); + } + Ok(()) +} diff --git a/sdk/crates/c2pa-codecs/tests/xmp.rs b/sdk/crates/c2pa-codecs/tests/xmp.rs new file mode 100644 index 000000000..df8bf6342 --- /dev/null +++ b/sdk/crates/c2pa-codecs/tests/xmp.rs @@ -0,0 +1,89 @@ +use std::io::Cursor; + +use c2pa_codecs::{Codec, CodecError, Decode, Encode}; +use common::{ASSETS, RANDOM_XMP}; + +mod common; + +#[test] +fn test_xmp_read() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_xmp { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_xmp(), Ok(None))); + } + + Ok(()) +} + +#[test] +fn test_xmp_write() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_xmp || !asset.supports_write_xmp { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_xmp(), Ok(None))); + + let mut dst = Cursor::new(Vec::new()); + assert!(matches!(codec.write_xmp(&mut dst, RANDOM_XMP), Ok(()))); + + let mut codec = Codec::from_stream(&mut dst)?; + assert_eq!(codec.read_xmp()?, Some(RANDOM_XMP.to_string())); + } + + Ok(()) +} + +#[test] +fn test_xmp_write_provenance() -> Result<(), CodecError> { + for asset in ASSETS { + if !asset.supports_read_xmp + || !asset.supports_write_xmp_provenance + || !asset.supports_read_xmp_provenance + { + continue; + } + + let mut src = Cursor::new(asset.bytes); + + let mut codec = Codec::from_stream(&mut src)?; + assert!(matches!(codec.read_xmp(), Ok(None))); + + let random_xmp = "test"; + let mut dst = Cursor::new(Vec::new()); + assert!(matches!( + codec.write_xmp_provenance(&mut dst, random_xmp), + Ok(()) + )); + + let mut codec = Codec::from_stream(&mut dst)?; + assert_eq!(codec.read_xmp_provenance()?, Some(random_xmp.to_string())); + } + + Ok(()) +} + +#[test] +fn test_xmp_remove() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} + +#[test] +fn test_xmp_remove_provenance() -> Result<(), CodecError> { + for asset in ASSETS { + // TODO + } + Ok(()) +} diff --git a/sdk/tests/fixtures/empty.jpg b/sdk/tests/fixtures/empty.jpg new file mode 100644 index 000000000..0a5808990 Binary files /dev/null and b/sdk/tests/fixtures/empty.jpg differ diff --git a/sdk/tests/fixtures/test.svg b/sdk/tests/fixtures/test.svg new file mode 100644 index 000000000..77b513a3d --- /dev/null +++ b/sdk/tests/fixtures/test.svg @@ -0,0 +1,2 @@ +<"xml gbpuovh@%0-1( arahbqur srg-y4zfvtt:'QVD,8"?> +>>>>+ \ No newline at end of file