@@ -455,32 +455,39 @@ def reformat_for_drupal(info):
455
455
456
456
ensure_directory (images_dir )
457
457
458
+ # ADDED 21 Jan 2025: selective processing of images
459
+ # the set of file names is to be stored in image_files
460
+ # The initial value includes images defined in attributes (to copy every time)
461
+ image_files = set ()
462
+
458
463
log .debug ("Copying source files for " + book ["Name" ])
459
- copy_files (book , book_src_dir , src_dir , dest_dir , info )
464
+ copy_files (book , book_src_dir , src_dir , dest_dir , info , image_files )
460
465
461
466
log .debug ("Copying images for " + book ["Name" ])
462
- copy_images (book , src_dir , images_dir , distro )
467
+ copy_images (book , src_dir , images_dir , distro , image_files )
463
468
464
469
465
470
466
- def copy_images (node , src_path , dest_dir , distro ):
471
+ def copy_images (node , src_path , dest_dir , distro , image_files ):
467
472
"""
468
473
Copy images over to the destination directory and flatten all image directories into the one top level dir.
469
- """
470
474
471
- def dir_callback (dir_node , parent_dir , depth ):
472
- node_dir = os .path .join (parent_dir , dir_node ["Dir" ])
473
- src = os .path .join (node_dir , "images" )
474
-
475
- if os .path .exists (src ):
476
- src_files = os .listdir (src )
477
- for src_file in src_files :
478
- shutil .copy (os .path .join (src , src_file ), dest_dir )
475
+ REWORKED 21 Jan 2025: we now assume that there is a single images directory and
476
+ that all other images subdirectories are simply symlinks into it. So we do not
477
+ iterate over the tree but simply copy the necessary files from that one images directory
478
+ """
479
479
480
- iter_tree (node , distro , dir_callback , parent_dir = src_path )
480
+ images_source_dir = os .path .join (src_path , "images" )
481
+ for image_file_name in image_files :
482
+ image_file_pathname = os .path .join (images_source_dir ,image_file_name )
483
+ if os .path .exists (image_file_pathname ):
484
+ shutil .copy (image_file_pathname , dest_dir )
485
+ # if an image file is not found, this is not an error, because it might
486
+ # have been picked up from a commented-out line. Actual missing images
487
+ # should be caught by the asciidoctor/asciibinder part of CI
481
488
482
489
483
- def copy_files (node , book_src_dir , src_dir , dest_dir , info ):
490
+ def copy_files (node , book_src_dir , src_dir , dest_dir , info , image_files ):
484
491
"""
485
492
Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the
486
493
content is referenced elsewhere and fix any links that should be cross references.
@@ -498,7 +505,7 @@ def topic_callback(topic_node, parent_dir, depth):
498
505
dest_file = os .path .join (node_dest_dir , topic_node ["File" ] + ".adoc" )
499
506
500
507
# Copy the file
501
- copy_file (info , book_src_dir , src_file , dest_dir , dest_file )
508
+ copy_file (info , book_src_dir , src_file , dest_dir , dest_file , image_files )
502
509
503
510
iter_tree (node , info ["distro" ], dir_callback , topic_callback )
504
511
@@ -509,6 +516,7 @@ def copy_file(
509
516
src_file ,
510
517
dest_dir ,
511
518
dest_file ,
519
+ image_files ,
512
520
include_check = True ,
513
521
tag = None ,
514
522
cwd = None ,
@@ -529,7 +537,7 @@ def copy_file(
529
537
# os.mknod(dest_file)
530
538
open (dest_file , "w" ).close ()
531
539
# Scrub/fix the content
532
- content = scrub_file (info , book_src_dir , src_file , tag = tag , cwd = cwd )
540
+ content = scrub_file (info , book_src_dir , src_file , image_files , tag = tag , cwd = cwd )
533
541
534
542
# Check for any includes
535
543
if include_check :
@@ -584,6 +592,7 @@ def copy_file(
584
592
include_file ,
585
593
dest_dir ,
586
594
dest_include_file ,
595
+ image_files ,
587
596
tag = include_tag ,
588
597
cwd = current_dir ,
589
598
)
@@ -612,8 +621,21 @@ def copy_file(
612
621
with open (dest_file , "w" ) as f :
613
622
f .write (content )
614
623
624
+ def detect_images (content , image_files ):
625
+ """
626
+ Detects all image file names referenced in the content, which is a readlines() output
627
+ Adds the filenames to the image_files set
628
+ Does NOT control for false positives such as commented out content,
629
+ because "false negatives" are worse
615
630
616
- def scrub_file (info , book_src_dir , src_file , tag = None , cwd = None ):
631
+ TEMPORARY: use both procedural and RE detection and report any misalignment
632
+ """
633
+ image_pattern = re .compile (r'image::?([^\s\[]+)\[.*?\]' )
634
+
635
+ for content_str in content :
636
+ image_files .update ({os .path .basename (f ) for f in image_pattern .findall (content_str )})
637
+
638
+ def scrub_file (info , book_src_dir , src_file , image_files , tag = None , cwd = None ):
617
639
"""
618
640
Scrubs a file and returns the cleaned file contents.
619
641
"""
@@ -657,6 +679,9 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
657
679
with open (src_file , "r" ) as f :
658
680
src_file_content = f .readlines ()
659
681
682
+ # detect image references in the content
683
+ detect_images (src_file_content , image_files )
684
+
660
685
# Scrub the content
661
686
content = ""
662
687
header_found = content_found = False
0 commit comments