Skip to content

fix: xmp jpeg write #1156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdk/src/asset_handlers/jpeg_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ impl RemoteRefEmbed for JpegIO {
.unwrap_or((None, MIN_XMP));

// add provenance and JPEG XMP prefix
let xmp = format!("{XMP_SIGNATURE}\0 {}", add_provenance(xmp, &manifest_uri)?);
let xmp = format!("{XMP_SIGNATURE}\0{}", add_provenance(xmp, &manifest_uri)?);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This extra space caused there to be a leading space in the XMP data

let segment = JpegSegment::new_with_contents(markers::APP1, Bytes::from(xmp));
// insert or add the segment
match xmp_index {
Expand Down
120 changes: 114 additions & 6 deletions sdk/src/utils/xmp_inmemory_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
use crate::{asset_io::CAIRead, jumbf_io::get_cailoader_handler, Error, Result};

const RDF_DESCRIPTION: &[u8] = b"rdf:Description";
const XMP_END: &[u8] = b"<?xpacket end=\"w\"?>";

pub const MIN_XMP: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?><x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 6.0.0"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/" xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmpMM:DocumentID="xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b" xmpMM:InstanceID="xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b"> </rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>"#;

Expand Down Expand Up @@ -90,19 +91,38 @@
}

// writes the event to the writer)
/// Add a value to XMP using a key, replaces the value if the key exists
/// Add a value to XMP using a key, replaces the value if the key exists, or adds it only once
fn add_xmp_key(xmp: &str, key: &str, value: &str) -> Result<String> {
let mut reader = Reader::from_str(xmp);
reader.config_mut().trim_text(true);
let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 2);
let orig_length = xmp.len();

// Minimal padding should be 2 KB to 4 KB. This is used if no XMP packet end is found.
// If no packet end is found and the original length is less than 4096, the packet will be
// extended.
let mut target_length = orig_length.max(4096);
// Remove the ending xpacket if present for easier manipulation
let xpacket_end = "<?xpacket end";
let xpacket_end_length = XMP_END.len();
let xmp_body = if let Some(pos) = xmp.rfind(xpacket_end) {
println!("position {pos}");
target_length = orig_length - xpacket_end_length;
xmp[..pos].trim_end()
Copy link
Collaborator

@mauricefisher64 mauricefisher64 Jun 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change I suggested about removes the else if case.

} else {
xmp
};

let mut reader = Reader::from_str(xmp_body);
// Do not trim text to preserve whitespace
reader.config_mut().trim_text(false);
reader.config_mut().expand_empty_elements = false;
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut added = false;
loop {
let event = reader
.read_event()
.map_err(|e| Error::XmpReadError(e.to_string()))?;
// println!("{:?}", event);
match event {
Event::Start(ref e) if e.name() == QName(RDF_DESCRIPTION) => {
Event::Start(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => {
// creates a new element
let mut elem = BytesStart::from_content(
String::from_utf8_lossy(RDF_DESCRIPTION),
Expand Down Expand Up @@ -130,13 +150,14 @@
if !added {
// didn't exist, so add it
elem.push_attribute((key, value));
added = true;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lack of added = true is why the remote manifests url was being added multiple times.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for tracking this down!

}
// writes the event to the writer
writer
.write_event(Event::Start(elem))
.map_err(|e| Error::XmpWriteError(e.to_string()))?;
}
Event::Empty(ref e) if e.name() == QName(RDF_DESCRIPTION) => {
Event::Empty(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => {
// creates a new element
let mut elem = BytesStart::from_content(
String::from_utf8_lossy(RDF_DESCRIPTION),
Expand All @@ -163,6 +184,7 @@
if !added {
// didn't exist, so add it
elem.push_attribute((key, value));
added = true;

Check warning on line 187 in sdk/src/utils/xmp_inmemory_utils.rs

View check run for this annotation

Codecov / codecov/patch

sdk/src/utils/xmp_inmemory_utils.rs#L187

Added line #L187 was not covered by tests
}
// writes the event to the writer
writer
Expand All @@ -177,6 +199,9 @@
}
}
}
// Maintain XMP packet length with padding if possible.
let padding_length = target_length.saturating_sub(writer.get_ref().get_ref().len());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this take into account the existing padding to keep the length the same if possible? This seems like it would not take into account the existing padding. Is there somewhere you account to the existing padding?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Line 107

target_length = orig_length - xpacket_end_length;

write_xmp_padding(&mut writer.get_mut(), padding_length)?;
let result = writer.into_inner().into_inner();
String::from_utf8(result).map_err(|e| Error::XmpWriteError(e.to_string()))
}
Expand All @@ -202,13 +227,46 @@
add_xmp_key(&xmp, "dcterms:provenance", provenance)
}

// According to the XMP Spec, the recommended practice is to use the ASCII space
// character (U+0020) in the appropriate encoding, with a newline about every 100 characters
fn write_xmp_padding<W: std::io::Write>(writer: &mut W, len: usize) -> std::io::Result<()> {
let mut remaining = len;
// Add newline before trailer no matter what.
writer.write_all(b"\n")?;
remaining = remaining.saturating_sub(1);

if remaining > 0 {
// Account for final newline since we are adding whitespace.
remaining -= 1;
while remaining > 0 {
let chunk = remaining.min(99);
writer.write_all(&vec![b' '; chunk])?;
remaining -= chunk;
if remaining > 0 {
writer.write_all(b"\n")?;
remaining -= 1;
}
}
writer.write_all(b"\n")?;
}
writer.write_all(XMP_END)?;
Ok(())
}

#[cfg(test)]
mod tests {
#![allow(clippy::expect_used)]
#![allow(clippy::unwrap_used)]

//use env_logger;
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
use wasm_bindgen_test::wasm_bindgen_test;

use super::*;
use crate::{
asset_handlers::jpeg_io::JpegIO,
asset_io::{AssetIO, RemoteRefEmbedType},
};

const XMP_DATA: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="contentauth">
Expand Down Expand Up @@ -261,4 +319,54 @@
println!("{xmp}");
assert_eq!(unicorn, Some(PROVENANCE.to_string()));
}

#[cfg_attr(not(target_arch = "wasm32"), actix::test)]
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test
)]
#[cfg_attr(target_os = "wasi", wstd::test)]
async fn test_broken_xmp_read_write_stream() {
let source_bytes = include_bytes!("../../tests/fixtures/IMG_0003.jpg");
let test_msg = "https://cai-manifests-stage.adobe.com/manifests/urn-c2pa-0ab6e8b8-5c28-4ef1-8f58-86c21f0349bf-adobe";

let handler = JpegIO::new("");

let assetio_handler = handler.get_handler("jpg");

let remote_ref_handler = assetio_handler.remote_ref_writer_ref().unwrap();

let mut source_stream = Cursor::new(source_bytes.to_vec());
let mut output_stream = Cursor::new(Vec::new());

let original_xmp_length = assetio_handler
.get_reader()
.read_xmp(&mut source_stream)
.unwrap()
.len();
source_stream.set_position(0);

remote_ref_handler
.embed_reference_to_stream(
&mut source_stream,
&mut output_stream,
RemoteRefEmbedType::Xmp(test_msg.to_string()),
)
.unwrap();

output_stream.set_position(0);

// read back in XMP
let read_xmp = assetio_handler
.get_reader()
.read_xmp(&mut output_stream)
.unwrap();

output_stream.set_position(0);

//std::fs::write("../target/xmp_write.jpg", output_stream.into_inner()).unwrap();

assert!(read_xmp.contains(test_msg));
assert_eq!(read_xmp.len(), original_xmp_length);
}
}
Loading