Add generation metadata to saved pngs from the generated thumbnails (from right-click or ... menu on thumbnails) (#1942)

sfisher · web-flow · commit 0977a7228f34 · 2025-08-26T23:19:44.000+02:00
* adding generation data to pngs without adding binary dependencies on Pillow which are difficult to fulfil for all platforms
* fixing circular imports caused by type-hinting Job
* interim commit for choice to save metadata
* working again after refactoring
* Fixing ruff problems
* updating tests
* Fixing tests and cleaning up code
* remove commented out lines no longer needed
* may make writing png slightly faster
* adding changes for naming, stray print, translations
* remove some comments for linting exceptions caused by circular imports
* fixing tests and formatting
* fixing types for tests of test_text
* ruff reformat
diff --git a/ai_diffusion/image.py b/ai_diffusion/image.py
@@ -10,6 +10,9 @@
 from .settings import settings, ImageFileFormat
 from .util import clamp, ensure, is_linux, client_logger as log
 
+import struct
+import zlib
+
 
 def multiple_of(number, multiple):
     """Round up to the nearest multiple of a number."""
@@ -399,6 +402,50 @@ def _mask_op(lhs: "Image", rhs: "Image", mode: QPainter.CompositionMode):
         result.reinterpretAsFormat(QImage.Format.Format_Grayscale8)
         return Image(result)
 
+    @staticmethod
+    def save_png_w_itxt(img_path: Union[str, Path], png_data: bytes, keyword: str, text: str):
+        if png_data[:8] != b"\x89PNG\r\n\x1a\n":
+            raise ValueError("Not a valid PNG file")
+
+        offset = 8
+        ihdr_inserted = False
+
+        with open(img_path, "wb") as f:
+            # Write PNG header
+            f.write(png_data[:8])
+
+            while offset < len(png_data):
+                length = struct.unpack(">I", png_data[offset : offset + 4])[0]
+                chunk_type = png_data[offset + 4 : offset + 8]
+                chunk_data = png_data[offset + 8 : offset + 8 + length]
+                crc = png_data[offset + 8 + length : offset + 12 + length]
+                offset += 12 + length
+
+                # Write original chunk
+                f.write(struct.pack(">I", length))
+                f.write(chunk_type)
+                f.write(chunk_data)
+                f.write(crc)
+
+                if not ihdr_inserted and chunk_type == b"IHDR":
+                    # Insert iTXt chunk after IHDR
+                    keyword_bytes = keyword.encode("latin1")
+                    text_bytes = text.encode("utf-8")
+                    itxt_data = (
+                        keyword_bytes
+                        + b"\x00"
+                        + b"\x00"  # compression flag: 0 (not compressed)
+                        + b"\x00"  # compression method: 0
+                        + b"\x00"  # language tag: empty
+                        + b"\x00"  # translated keyword: empty
+                        + text_bytes
+                    )
+                    f.write(struct.pack(">I", len(itxt_data)))
+                    f.write(b"iTXt")
+                    f.write(itxt_data)
+                    f.write(struct.pack(">I", zlib.crc32(b"iTXt" + itxt_data) & 0xFFFFFFFF))
+                    ihdr_inserted = True
+
     @classmethod
     def mask_subtract(cls, lhs: "Image", rhs: "Image"):
         return cls._mask_op(rhs, lhs, QPainter.CompositionMode.CompositionMode_SourceOut)
@@ -535,6 +582,10 @@ def save(self, filepath: Union[str, Path]):
         finally:
             file.close()
 
+    def save_png_with_metadata(self, filepath: Union[str, Path], metadata_text: str):
+        png_bytes = bytes(self.to_bytes(ImageFileFormat.png))
+        self.save_png_w_itxt(filepath, png_bytes, "parameters", metadata_text)
+
     def debug_save(self, name):
         if settings.debug_image_folder:
             self.save(Path(settings.debug_image_folder, f"{name}.png"))
diff --git a/ai_diffusion/model.py b/ai_diffusion/model.py
@@ -35,6 +35,7 @@
 from .region import Region, RegionLink, RootRegion, process_regions, get_region_inpaint_mask
 from .resources import ControlMode
 from .resolution import compute_bounds, compute_relative_bounds
+from .text import create_img_metadata
 
 
 class QueueMode(Enum):
@@ -1391,4 +1392,9 @@ def _save_job_result(model: Model, job: Job | None, index: int):
     base_image = model._get_current_image(Bounds(0, 0, *model.document.extent))
     result_image = job.results[index]
     base_image.draw_image(result_image, job.params.bounds.offset)
-    base_image.save(path)
+
+    if settings.save_image_metadata:
+        metadata_text = create_img_metadata(job.params)
+        base_image.save_png_with_metadata(filepath=path, metadata_text=metadata_text)
+    else:
+        base_image.save(path)
diff --git a/ai_diffusion/settings.py b/ai_diffusion/settings.py
@@ -215,6 +215,13 @@ class Settings(QObject):
         _("Translate text prompts from the selected language to English"),
     )
 
+    save_image_metadata: bool
+    _save_image_metadata = Setting(
+        _("Save Image Metadata"),
+        False,
+        _("When saving generated images from thumbnails, include metadata in the PNG"),
+    )
+
     prompt_line_count: int
     _prompt_line_count = Setting(
         _("Prompt Line Count"), 2, _("Size of the text editor for image descriptions")
diff --git a/ai_diffusion/text.py b/ai_diffusion/text.py
@@ -7,6 +7,7 @@
 from .files import FileCollection, FileSource
 from .localization import translate as _
 from .util import client_logger as log
+from .jobs import JobParams
 
 
 class LoraId(NamedTuple):
@@ -219,3 +220,53 @@ def edit_attention(text: str, positive: bool) -> str:
         if weight == 1.0 and open_bracket == "("
         else f"{open_bracket}{attention_string}:{weight:.1f}{close_bracket}"
     )
+
+
+# creates the img text metadata for embedding in PNG files in style like Automatic1111
+def create_img_metadata(params: JobParams):
+    meta = params.metadata
+
+    prompt = meta.get("prompt", "")
+    neg_prompt = meta.get("negative_prompt", "")
+    sampler_info = meta.get("sampler", "")
+    model = meta.get("checkpoint", "Unknown")
+    seed = params.seed
+    width = params.bounds.width
+    height = params.bounds.height
+    strength = meta.get("strength", None)
+    loras = meta.get("loras", [])
+
+    # Try to extract sampler, steps, and cfg scale from "sampler"
+    match = re.match(r".*?-\s*(.+?)\s*\((\d+)\s*/\s*([\d.]+)\)", sampler_info)
+    if match:
+        sampler, steps, cfg_scale = match.groups()
+    else:
+        sampler, steps, cfg_scale = sampler_info, "Unknown", "Unknown"
+
+    # Embed LoRAs in the prompt
+    lora_tags = ""
+    for lora in loras:
+        if isinstance(lora, dict):
+            name = lora.get("name")
+            weight = lora.get("weight", 0.0)
+        elif isinstance(lora, (list, tuple)) and len(lora) >= 2:
+            name, weight = lora[0], lora[1]
+        else:
+            continue
+        if weight != 0:
+            lora_tags += f" <lora:{name}:{weight}>"
+
+    full_prompt = f"{prompt.strip()}{lora_tags}"
+
+    # Construct output
+    lines = []
+    lines.append(f"Prompt: {full_prompt}")
+    lines.append(f"Negative prompt: {neg_prompt}")
+    lines.append(
+        f"Steps: {steps}, Sampler: {sampler}, CFG scale: {cfg_scale}, Seed: {seed}, Size: {width}x{height}, Model hash: unknown, Model: {model}"
+    )
+
+    if strength is not None and strength != 1.0:
+        lines[-1] += f", Denoising strength: {strength}"
+
+    return "\n".join(lines)
diff --git a/ai_diffusion/ui/settings.py b/ai_diffusion/ui/settings.py
@@ -485,6 +485,7 @@ def __init__(self):
         )
         self.add("new_seed_after_apply", SwitchSetting(S._new_seed_after_apply, parent=self))
         self.add("debug_dump_workflow", SwitchSetting(S._debug_dump_workflow, parent=self))
+        self.add("save_image_metadata", SwitchSetting(S._save_image_metadata, parent=self))
 
         languages = [(lang.name, lang.id) for lang in Localization.available]
         self._widgets["language"].set_items(languages)
diff --git a/tests/test_image.py b/tests/test_image.py
@@ -1,5 +1,8 @@
 import pytest
 import numpy as np
+import struct
+import zlib
+
 from PyQt5.QtGui import QImage, qRgba
 from PyQt5.QtCore import Qt, QByteArray
 from PIL import Image as PILImage
@@ -344,3 +347,57 @@ def test_downscale():
     img = create_test_image(12, 8)
     result = Image.scale(img, Extent(6, 4))
     assert result.width == 6 and result.height == 4
+
+
+def test_save_png_w_itxt_valid(tmp_path):
+    # Create a minimal valid PNG file
+    png_header = b"\x89PNG\r\n\x1a\n"
+    ihdr_chunk = (
+        b"\x00\x00\x00\rIHDR" + b"\x00\x00\x00\x01" + b"\x00\x00\x00\x01" + b"\x08\x02\x00\x00\x00"
+    )
+    ihdr_crc = struct.pack(
+        ">I",
+        zlib.crc32(b"IHDR" + b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00") & 0xFFFFFFFF,
+    )
+    iend_chunk = b"\x00\x00\x00\x00IEND" + struct.pack(">I", zlib.crc32(b"IEND") & 0xFFFFFFFF)
+    png_data = png_header + ihdr_chunk + ihdr_crc + iend_chunk
+
+    file_path = tmp_path / "test_image.png"
+
+    Image.save_png_w_itxt(
+        img_path=file_path, png_data=png_data, keyword="testkey", text="testvalue"
+    )
+    # (file_path, "testkey", "testvalue")
+
+    # Check that the file still starts with PNG header
+    data = file_path.read_bytes()
+    assert data.startswith(png_header)
+    # Check that iTXt chunk is present
+    assert b"iTXt" in data
+    assert b"testkey" in data
+    assert b"testvalue" in data
+
+
+def test_save_png_w_itxt_invalid(tmp_path):
+    # Not a PNG file
+    file_path = tmp_path / "not_png.txt"
+    png_data = b"not a png"
+
+    try:
+        Image.save_png_w_itxt(img_path=file_path, png_data=png_data, keyword="key", text="value")
+        assert False, "Should have raised ValueError"
+    except ValueError as e:
+        assert "Not a valid PNG file" in str(e)
+
+
+def test_save_png_with_metadata(tmp_path):
+    # Create a simple image
+    img = Image.create(Extent(2, 2), Qt.GlobalColor.red)
+    file_path = tmp_path / "test_meta.png"
+
+    img.save_png_with_metadata(file_path, "my test metadata in the png")
+
+    # Check that the file exists and starts with PNG header
+    data = file_path.read_bytes()
+    assert data.startswith(b"\x89PNG\r\n\x1a\n")
+    assert b"my test metadata in the png" in data
diff --git a/tests/test_text.py b/tests/test_text.py
@@ -1,6 +1,14 @@
-from ai_diffusion.text import merge_prompt, extract_loras, edit_attention, select_on_cursor_pos
+from ai_diffusion.text import (
+    merge_prompt,
+    extract_loras,
+    edit_attention,
+    select_on_cursor_pos,
+    create_img_metadata,
+)
 from ai_diffusion.api import LoraInput
 from ai_diffusion.files import File, FileCollection
+from ai_diffusion.jobs import JobParams
+from ai_diffusion.image import Bounds
 
 
 def test_merge_prompt():
@@ -84,6 +92,134 @@ def test_extract_loras_meta():
     )
 
 
+def test_create_img_metadata_basic():
+    bounds = Bounds(0, 0, 512, 768)
+    metadata = {
+        "prompt": "A cat",
+        "negative_prompt": "dog",
+        "sampler": "Euler - euler_a (20 / 7.5)",
+        "checkpoint": "model.ckpt",
+        "strength": 0.8,
+        "loras": [],
+    }
+    job_params = JobParams(
+        bounds=bounds,
+        name="test",
+        metadata=metadata,
+        seed=12345,
+    )
+
+    result = create_img_metadata(job_params)
+    assert "Prompt: A cat" in result
+    assert "Negative prompt: dog" in result
+    assert (
+        "Steps: 20, Sampler: euler_a, CFG scale: 7.5, Seed: 12345, Size: 512x768, Model hash: unknown, Model: model.ckpt, Denoising strength: 0.8"
+        in result
+    )
+
+
+def test_create_img_metadata_sampler_unmatched():
+    bounds = Bounds(0, 0, 256, 256)
+    metadata = {
+        "prompt": "Test",
+        "negative_prompt": "",
+        "sampler": "UnknownSampler",
+        "checkpoint": "unknown.ckpt",
+        "loras": [],
+    }
+
+    job_params = JobParams(
+        bounds=bounds,
+        name="test",
+        metadata=metadata,
+        seed=12345,
+    )
+
+    result = create_img_metadata(job_params)
+    assert "Sampler: UnknownSampler" in result
+    assert "Steps: Unknown" in result
+    assert "CFG scale: Unknown" in result
+
+
+def test_create_img_metadata_loras_dict_and_tuple():
+    bounds = Bounds(0, 0, 128, 128)
+
+    metadata = {
+        "prompt": "Prompt",
+        "negative_prompt": "",
+        "sampler": "Euler - euler_a (10 / 5.0)",
+        "checkpoint": "loramodel.ckpt",
+        "loras": [{"name": "lora1", "weight": 0.7}, ("lora2", 0.5), ["lora3", 0.9]],
+    }
+
+    job_params = JobParams(
+        bounds=bounds,
+        name="test",
+        metadata=metadata,
+        seed=0,
+    )
+    result = create_img_metadata(job_params)
+    assert "<lora:lora1:0.7>" in result
+    assert "<lora:lora2:0.5>" in result
+    assert "<lora:lora3:0.9>" in result
+
+
+def test_create_img_metadata_strength_none_and_one():
+    bounds = Bounds(0, 0, 64, 64)
+
+    job_params_none = JobParams(
+        bounds=bounds,
+        name="test",
+        metadata={
+            "prompt": "Prompt",
+            "negative_prompt": "",
+            "sampler": "Euler - euler_a (5 / 2.0)",
+            "checkpoint": "model.ckpt",
+            "strength": None,
+            "loras": [],
+        },
+        seed=12345,
+    )
+
+    job_params_one = JobParams(
+        bounds=bounds,
+        name="test",
+        metadata={
+            "prompt": "Prompt",
+            "negative_prompt": "",
+            "sampler": "Euler - euler_a (5 / 2.0)",
+            "checkpoint": "model.ckpt",
+            "strength": 1.0,
+            "loras": [],
+        },
+        seed=12345,
+    )
+
+    result_none = create_img_metadata(job_params_none)
+    result_one = create_img_metadata(job_params_one)
+    assert "Denoising strength" not in result_none
+    assert "Denoising strength" not in result_one
+
+
+def test_create_img_metadata_missing_metadata_fields():
+    jp = JobParams(
+        bounds=Bounds(0, 0, 100, 200),
+        name="test",
+        metadata={},
+        seed=999,
+    )
+
+    result = create_img_metadata(jp)
+    assert "Prompt: " in result
+    assert "Negative prompt: " in result
+    assert "Steps: Unknown" in result
+    assert "Sampler: " in result
+    assert "CFG scale: Unknown" in result
+    assert "Seed: 999" in result
+    assert "Size: 100x200" in result
+    assert "Model: Unknown" in result
+
+
 class TestEditAttention:
     def test_empty_selection(self):
         assert edit_attention("", positive=True) == ""

Original file line number	Diff line number	Diff line change
`@@ -485,6 +485,7 @@ def __init__(self):`
`485`	`485`	`)`
`486`	`486`	`self.add("new_seed_after_apply", SwitchSetting(S._new_seed_after_apply, parent=self))`
`487`	`487`	`self.add("debug_dump_workflow", SwitchSetting(S._debug_dump_workflow, parent=self))`
	`488`	`+ self.add("save_image_metadata", SwitchSetting(S._save_image_metadata, parent=self))`
`488`	`489`
`489`	`490`	`languages = [(lang.name, lang.id) for lang in Localization.available]`
`490`	`491`	`self._widgets["language"].set_items(languages)`