Skip to content

Commit db064d7

Browse files
committed
OSDOCS 13186 modify build_for_portal.py to detect images and not duplicate the entire image directory for every book
1 parent 0eca412 commit db064d7

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

build_for_portal.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -455,32 +455,39 @@ def reformat_for_drupal(info):
455455

456456
ensure_directory(images_dir)
457457

458+
# ADDED 21 Jan 2025: selective processing of images
459+
# the set of file names is to be stored in image_files
460+
# The initial value includes images defined in attributes (to copy every time)
461+
image_files = set()
462+
458463
log.debug("Copying source files for " + book["Name"])
459-
copy_files(book, book_src_dir, src_dir, dest_dir, info)
464+
copy_files(book, book_src_dir, src_dir, dest_dir, info, image_files)
460465

461466
log.debug("Copying images for " + book["Name"])
462-
copy_images(book, src_dir, images_dir, distro)
467+
copy_images(book, src_dir, images_dir, distro, image_files)
463468

464469

465470

466-
def copy_images(node, src_path, dest_dir, distro):
471+
def copy_images(node, src_path, dest_dir, distro, image_files):
467472
"""
468473
Copy images over to the destination directory and flatten all image directories into the one top level dir.
469-
"""
470474
471-
def dir_callback(dir_node, parent_dir, depth):
472-
node_dir = os.path.join(parent_dir, dir_node["Dir"])
473-
src = os.path.join(node_dir, "images")
474-
475-
if os.path.exists(src):
476-
src_files = os.listdir(src)
477-
for src_file in src_files:
478-
shutil.copy(os.path.join(src, src_file), dest_dir)
475+
REWORKED 21 Jan 2025: we now assume that there is a single images directory and
476+
that all other images subdirectories are simply symlinks into it. So we do not
477+
iterate over the tree but simply copy the necessary files from that one images directory
478+
"""
479479

480-
iter_tree(node, distro, dir_callback, parent_dir=src_path)
480+
images_source_dir = os.path.join(src_path, "images")
481+
for image_file_name in image_files:
482+
image_file_pathname = os.path.join(images_source_dir,image_file_name)
483+
if os.path.exists(image_file_pathname):
484+
shutil.copy(image_file_pathname, dest_dir)
485+
# if an image file is not found, this is not an error, because it might
486+
# have been picked up from a commented-out line. Actual missing images
487+
# should be caught by the asciidoctor/asciibinder part of CI
481488

482489

483-
def copy_files(node, book_src_dir, src_dir, dest_dir, info):
490+
def copy_files(node, book_src_dir, src_dir, dest_dir, info, image_files):
484491
"""
485492
Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the
486493
content is referenced elsewhere and fix any links that should be cross references.
@@ -498,7 +505,7 @@ def topic_callback(topic_node, parent_dir, depth):
498505
dest_file = os.path.join(node_dest_dir, topic_node["File"] + ".adoc")
499506

500507
# Copy the file
501-
copy_file(info, book_src_dir, src_file, dest_dir, dest_file)
508+
copy_file(info, book_src_dir, src_file, dest_dir, dest_file, image_files)
502509

503510
iter_tree(node, info["distro"], dir_callback, topic_callback)
504511

@@ -509,6 +516,7 @@ def copy_file(
509516
src_file,
510517
dest_dir,
511518
dest_file,
519+
image_files,
512520
include_check=True,
513521
tag=None,
514522
cwd=None,
@@ -529,7 +537,7 @@ def copy_file(
529537
# os.mknod(dest_file)
530538
open(dest_file, "w").close()
531539
# Scrub/fix the content
532-
content = scrub_file(info, book_src_dir, src_file, tag=tag, cwd=cwd)
540+
content = scrub_file(info, book_src_dir, src_file, image_files, tag=tag, cwd=cwd)
533541

534542
# Check for any includes
535543
if include_check:
@@ -584,6 +592,7 @@ def copy_file(
584592
include_file,
585593
dest_dir,
586594
dest_include_file,
595+
image_files,
587596
tag=include_tag,
588597
cwd=current_dir,
589598
)
@@ -612,8 +621,21 @@ def copy_file(
612621
with open(dest_file, "w") as f:
613622
f.write(content)
614623

624+
def detect_images(content, image_files):
625+
"""
626+
Detects all image file names referenced in the content, which is a readlines() output
627+
Adds the filenames to the image_files set
628+
Does NOT control for false positives such as commented out content,
629+
because "false negatives" are worse
615630
616-
def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
631+
TEMPORARY: use both procedural and RE detection and report any misalignment
632+
"""
633+
image_pattern = re.compile(r'image::?([^\s\[]+)\[.*?\]')
634+
635+
for content_str in content:
636+
image_files.update({os.path.basename(f) for f in image_pattern.findall(content_str)})
637+
638+
def scrub_file(info, book_src_dir, src_file, image_files, tag=None, cwd=None):
617639
"""
618640
Scrubs a file and returns the cleaned file contents.
619641
"""
@@ -657,6 +679,9 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
657679
with open(src_file, "r") as f:
658680
src_file_content = f.readlines()
659681

682+
# detect image references in the content
683+
detect_images(src_file_content, image_files)
684+
660685
# Scrub the content
661686
content = ""
662687
header_found = content_found = False

0 commit comments

Comments
 (0)