-
Notifications
You must be signed in to change notification settings - Fork 84
fix: xmp jpeg write #1156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
fix: xmp jpeg write #1156
Changes from all commits
2103b8e
c14e075
4794309
c121378
3037059
e9bf11e
36cc914
c3710b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
use crate::{asset_io::CAIRead, jumbf_io::get_cailoader_handler, Error, Result}; | ||
|
||
const RDF_DESCRIPTION: &[u8] = b"rdf:Description"; | ||
const XMP_END: &[u8] = b"<?xpacket end=\"w\"?>"; | ||
|
||
pub const MIN_XMP: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?><x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 6.0.0"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/" xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmpMM:DocumentID="xmp.did:cb9f5498-bb58-4572-8043-8c369e6bfb9b" xmpMM:InstanceID="xmp.iid:cb9f5498-bb58-4572-8043-8c369e6bfb9b"> </rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>"#; | ||
|
||
|
@@ -90,19 +91,38 @@ | |
} | ||
|
||
// writes the event to the writer) | ||
/// Add a value to XMP using a key, replaces the value if the key exists | ||
/// Add a value to XMP using a key, replaces the value if the key exists, or adds it only once | ||
fn add_xmp_key(xmp: &str, key: &str, value: &str) -> Result<String> { | ||
let mut reader = Reader::from_str(xmp); | ||
reader.config_mut().trim_text(true); | ||
let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 2); | ||
let orig_length = xmp.len(); | ||
|
||
// Minimal padding should be 2 KB to 4 KB. This is used if no XMP packet end is found. | ||
// If no packet end is found and the original length is less than 4096, the packet will be | ||
// extended. | ||
let mut target_length = orig_length.max(4096); | ||
// Remove the ending xpacket if present for easier manipulation | ||
let xpacket_end = "<?xpacket end"; | ||
let xpacket_end_length = XMP_END.len(); | ||
let xmp_body = if let Some(pos) = xmp.rfind(xpacket_end) { | ||
println!("position {pos}"); | ||
target_length = orig_length - xpacket_end_length; | ||
xmp[..pos].trim_end() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change I suggested about removes the else if case. |
||
} else { | ||
xmp | ||
}; | ||
|
||
let mut reader = Reader::from_str(xmp_body); | ||
// Do not trim text to preserve whitespace | ||
reader.config_mut().trim_text(false); | ||
reader.config_mut().expand_empty_elements = false; | ||
let mut writer = Writer::new(Cursor::new(Vec::new())); | ||
let mut added = false; | ||
loop { | ||
let event = reader | ||
.read_event() | ||
.map_err(|e| Error::XmpReadError(e.to_string()))?; | ||
// println!("{:?}", event); | ||
match event { | ||
Event::Start(ref e) if e.name() == QName(RDF_DESCRIPTION) => { | ||
Event::Start(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => { | ||
// creates a new element | ||
let mut elem = BytesStart::from_content( | ||
String::from_utf8_lossy(RDF_DESCRIPTION), | ||
|
@@ -130,13 +150,14 @@ | |
if !added { | ||
// didn't exist, so add it | ||
elem.push_attribute((key, value)); | ||
added = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The lack of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for tracking this down! |
||
} | ||
// writes the event to the writer | ||
writer | ||
.write_event(Event::Start(elem)) | ||
.map_err(|e| Error::XmpWriteError(e.to_string()))?; | ||
} | ||
Event::Empty(ref e) if e.name() == QName(RDF_DESCRIPTION) => { | ||
Event::Empty(ref e) if e.name() == QName(RDF_DESCRIPTION) && !added => { | ||
// creates a new element | ||
let mut elem = BytesStart::from_content( | ||
String::from_utf8_lossy(RDF_DESCRIPTION), | ||
|
@@ -163,6 +184,7 @@ | |
if !added { | ||
// didn't exist, so add it | ||
elem.push_attribute((key, value)); | ||
added = true; | ||
} | ||
// writes the event to the writer | ||
writer | ||
|
@@ -177,6 +199,9 @@ | |
} | ||
} | ||
} | ||
// Maintain XMP packet length with padding if possible. | ||
let padding_length = target_length.saturating_sub(writer.get_ref().get_ref().len()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this take into account the existing padding to keep the length the same if possible? This seems like it would not take into account the existing padding. Is there somewhere you account to the existing padding? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Line 107
|
||
write_xmp_padding(&mut writer.get_mut(), padding_length)?; | ||
let result = writer.into_inner().into_inner(); | ||
String::from_utf8(result).map_err(|e| Error::XmpWriteError(e.to_string())) | ||
} | ||
|
@@ -202,13 +227,46 @@ | |
add_xmp_key(&xmp, "dcterms:provenance", provenance) | ||
} | ||
|
||
// According to the XMP Spec, the recommended practice is to use the ASCII space | ||
// character (U+0020) in the appropriate encoding, with a newline about every 100 characters | ||
fn write_xmp_padding<W: std::io::Write>(writer: &mut W, len: usize) -> std::io::Result<()> { | ||
let mut remaining = len; | ||
// Add newline before trailer no matter what. | ||
writer.write_all(b"\n")?; | ||
remaining = remaining.saturating_sub(1); | ||
|
||
if remaining > 0 { | ||
// Account for final newline since we are adding whitespace. | ||
remaining -= 1; | ||
while remaining > 0 { | ||
let chunk = remaining.min(99); | ||
writer.write_all(&vec![b' '; chunk])?; | ||
remaining -= chunk; | ||
if remaining > 0 { | ||
writer.write_all(b"\n")?; | ||
remaining -= 1; | ||
} | ||
} | ||
writer.write_all(b"\n")?; | ||
} | ||
writer.write_all(XMP_END)?; | ||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
#![allow(clippy::expect_used)] | ||
#![allow(clippy::unwrap_used)] | ||
|
||
//use env_logger; | ||
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))] | ||
use wasm_bindgen_test::wasm_bindgen_test; | ||
|
||
use super::*; | ||
use crate::{ | ||
asset_handlers::jpeg_io::JpegIO, | ||
asset_io::{AssetIO, RemoteRefEmbedType}, | ||
}; | ||
|
||
const XMP_DATA: &str = r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?> | ||
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="contentauth"> | ||
|
@@ -261,4 +319,54 @@ | |
println!("{xmp}"); | ||
assert_eq!(unicorn, Some(PROVENANCE.to_string())); | ||
} | ||
|
||
#[cfg_attr(not(target_arch = "wasm32"), actix::test)] | ||
#[cfg_attr( | ||
all(target_arch = "wasm32", not(target_os = "wasi")), | ||
wasm_bindgen_test | ||
)] | ||
#[cfg_attr(target_os = "wasi", wstd::test)] | ||
async fn test_broken_xmp_read_write_stream() { | ||
let source_bytes = include_bytes!("../../tests/fixtures/IMG_0003.jpg"); | ||
let test_msg = "https://cai-manifests-stage.adobe.com/manifests/urn-c2pa-0ab6e8b8-5c28-4ef1-8f58-86c21f0349bf-adobe"; | ||
|
||
let handler = JpegIO::new(""); | ||
|
||
let assetio_handler = handler.get_handler("jpg"); | ||
|
||
let remote_ref_handler = assetio_handler.remote_ref_writer_ref().unwrap(); | ||
|
||
let mut source_stream = Cursor::new(source_bytes.to_vec()); | ||
let mut output_stream = Cursor::new(Vec::new()); | ||
|
||
let original_xmp_length = assetio_handler | ||
.get_reader() | ||
.read_xmp(&mut source_stream) | ||
.unwrap() | ||
.len(); | ||
source_stream.set_position(0); | ||
|
||
remote_ref_handler | ||
.embed_reference_to_stream( | ||
&mut source_stream, | ||
&mut output_stream, | ||
RemoteRefEmbedType::Xmp(test_msg.to_string()), | ||
) | ||
.unwrap(); | ||
|
||
output_stream.set_position(0); | ||
|
||
// read back in XMP | ||
let read_xmp = assetio_handler | ||
.get_reader() | ||
.read_xmp(&mut output_stream) | ||
.unwrap(); | ||
|
||
output_stream.set_position(0); | ||
|
||
//std::fs::write("../target/xmp_write.jpg", output_stream.into_inner()).unwrap(); | ||
|
||
assert!(read_xmp.contains(test_msg)); | ||
assert_eq!(read_xmp.len(), original_xmp_length); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This extra space caused there to be a leading space in the XMP data