Skip to content

Commit a1dbfbf

Browse files
committed
Revert ASpace XLSX and bump version to 1.3.0
1 parent 7200fa9 commit a1dbfbf

9 files changed

+14
-247
lines changed

README.md

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
Analyze disk images and/or create ready-to-ingest SIPs from a directory of disk images and related files.
44

5-
Version: 1.2.0
5+
Version: 1.3.0
66

77
## Breaking Changes
88

9-
Starting in v1.2.0, diskimageprocessor.py and the Processing mode of the GUI populate an ArchivesSpace description import XLSX instead of the previous ISAD-based CSV.
9+
In v1.2.0, diskimageprocessor.py and the Processing mode of the GUI were changed to populate an ArchivesSpace description import XLSX instead of the previous ISAD-based CSV.
1010

11-
To have Disk Image Processor create the original ISAD-based description CSV instead, use the `-c` or `--csv` option (GUI support not yet added - use version 1.1.0 or before from the Releases tab for a GUI that writes the description CSV instead).
11+
In v1.3.0+, this change is reverted and the option to create an ArchivesSpace description XLSX removed.
1212

1313
## Usage
1414

@@ -54,18 +54,16 @@ For HFS file systems, files are exported from the disk image using CLI version o
5454

5555
For UDF file systems, files are copied from the mounted disk image and `walk_to_dfxml.py` is used to generate DFXML.
5656

57-
When complete, a description spreadsheet will be created containings ome pre-populated archival description.
58-
59-
From v1.2.0, Disk Image Processor will write this information into an ArchivesSpace description XLSX spreadsheet.
60-
61-
In previous versions or if the `"-c"/"--csv"` option is passed in v1.2.0+, a description.csv file will be created instead, containing the following columns:
57+
Disk Image Processor will create a description.csv file containing the following columns:
6258

6359
* Date statement
6460
* Date begin
6561
* Date end
6662
* Extent
6763
* Scope and content (containing information about the tool used to carve logical files and the most common file formats)
6864

65+
(*Note in Disk Image Processor 1.2.0, this CSV file was replaced by an ArchivesSpace Excel spreadsheet by default. This change has been reverted in 1.3.0.*)
66+
6967
The destination directory also contains a log file and a "SIPs" directory containing a SIP created from each input disk image.
7068

7169
Each SIP directory contains a metadata/checksum.md5 manifest by default, but may optionally be bagged instead.
-29.1 KB
Binary file not shown.

diskimageprocessor.py

Lines changed: 4 additions & 186 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@
2929
import datetime
3030
import itertools
3131
import logging
32-
import openpyxl
3332
import os
3433
import shutil
35-
import stat
3634
import subprocess
3735
import sys
3836
import time
@@ -258,174 +256,6 @@ def create_spreadsheet(args, sips, volumes, logger):
258256
logger.info("Description CSV created.")
259257

260258

261-
def create_aspace_excel_sheet(args, sips, volumes, logger):
262-
"""Create new copy of ASpace XLSX and append rows describing disk images."""
263-
xlsx_path = os.path.abspath(os.path.join(args.destination, "description.xlsx"))
264-
template_path = os.path.abspath(
265-
os.path.join(THIS_DIR, "aspace_template", "aspace_import_template.xlsx")
266-
)
267-
268-
try:
269-
shutil.copyfile(template_path, xlsx_path)
270-
except OSError as err:
271-
logger.error(f"Unable to copy ASpace template to destination: {err}")
272-
273-
# Set ASpace file permissions
274-
try:
275-
os.chmod(
276-
xlsx_path,
277-
stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH,
278-
)
279-
except OSError as err:
280-
logger.error(f"Error setting permissions: {err}")
281-
282-
workbook = openpyxl.load_workbook(filename=xlsx_path)
283-
worksheet = workbook["Data"]
284-
285-
# TODO: Deduplicate with create_speadsheet
286-
# Maybe create separate method that creates dict with info, and handle
287-
# opening/writing csv or xlsx separately
288-
for item in sorted(os.listdir(sips)):
289-
sip_path = os.path.join(sips, item)
290-
291-
if not os.path.isdir(sip_path):
292-
continue
293-
294-
disk_volumes = volumes[item]
295-
number_volumes = len(disk_volumes)
296-
297-
date_earliest = ""
298-
date_latest = ""
299-
300-
# Get and sum information from all DFXML files generated
301-
dfxml_files = []
302-
subdoc_dir = os.path.join(sip_path, "metadata", "submissionDocumentation")
303-
if args.bagfiles:
304-
subdoc_dir = os.path.join(
305-
sip_path, "data", "metadata", "submissionDocumentation"
306-
)
307-
for root, _, files in os.walk(subdoc_dir):
308-
for file in files:
309-
if file.startswith("dfxml"):
310-
dfxml_files.append(os.path.join(root, file))
311-
312-
dfxml_files_info = []
313-
for dfxml_file in dfxml_files:
314-
dfxml_info = _parse_dfxml(dfxml_file, logger)
315-
if not dfxml_info:
316-
logger.warning(
317-
"No fileobjects in DFXML file {} - possibly file system fiwalk doesn't recognize".format(
318-
dfxml_file
319-
)
320-
)
321-
continue
322-
dfxml_files_info.append(dfxml_info)
323-
324-
file_count = sum([dfxml_info["files"] for dfxml_info in dfxml_files_info])
325-
total_bytes = sum([dfxml_info["bytes"] for dfxml_info in dfxml_files_info])
326-
file_systems = [volume["file_system"] for volume in disk_volumes]
327-
# Deduplicate list
328-
file_systems = list(dict.fromkeys(file_systems))
329-
file_systems_str = ", ".join(file_systems)
330-
331-
for dfxml_info in dfxml_files_info:
332-
if not date_earliest or dfxml_info["date_earliest"] < date_earliest:
333-
date_earliest = dfxml_info["date_earliest"]
334-
if not date_latest or dfxml_info["date_latest"] > date_latest:
335-
date_latest = dfxml_info["date_latest"]
336-
337-
# Create list with empty string for each of template's columns
338-
row_to_write = []
339-
for _ in range(173):
340-
row_to_write.append("")
341-
342-
# Row indices for fields to write
343-
INDEX_FILENAME = 6
344-
INDEX_LEVEL_OF_DESCRIPTION = 8
345-
INDEX_DATE_START = 23
346-
INDEX_DATE_END = 24
347-
INDEX_EXTENT_NUMBER = 34
348-
INDEX_EXTENT_TYPE = 35
349-
INDEX_SIZE = 36
350-
INDEX_SCOPE_CONTENTS = 170
351-
352-
# Fields that are always constant
353-
row_to_write[INDEX_FILENAME] = item
354-
row_to_write[INDEX_LEVEL_OF_DESCRIPTION] = "File"
355-
356-
if file_count == 0:
357-
row_to_write[
358-
INDEX_SCOPE_CONTENTS
359-
] = "Error gathering statistics from SIP directory"
360-
361-
worksheet.append(row_to_write)
362-
363-
logger.error("Unable to read DFXML files for {}".format(sip_path))
364-
continue
365-
366-
# Get file formats from Brunnhilde
367-
file_formats = []
368-
file_format_csv = os.path.join(
369-
sip_path,
370-
"metadata",
371-
"submissionDocumentation",
372-
"brunnhilde",
373-
"csv_reports",
374-
"formats.csv",
375-
)
376-
if args.bagfiles:
377-
file_format_csv = os.path.join(
378-
sip_path,
379-
"data",
380-
"metadata",
381-
"submissionDocumentation",
382-
"brunnhilde",
383-
"csv_reports",
384-
"formats.csv",
385-
)
386-
387-
try:
388-
with open(file_format_csv, "r") as f:
389-
reader = csv.reader(f)
390-
next(reader)
391-
for row in itertools.islice(reader, 5):
392-
file_formats.append(row[0])
393-
except:
394-
file_formats.append(
395-
"ERROR! No Brunnhilde formats.csv file to pull formats from."
396-
)
397-
398-
file_formats = [element or "Unidentified" for element in file_formats]
399-
file_formats_str = ", ".join(file_formats)
400-
401-
if number_volumes > 1:
402-
scope_content = "Files exported from {} volumes with file systems: {}. File formats: {}".format(
403-
number_volumes, file_systems_str, file_formats_str
404-
)
405-
else:
406-
scope_content = (
407-
"Files exported from {} file system volume. File formats: {}".format(
408-
disk_volumes[0]["file_system"], file_formats_str
409-
)
410-
)
411-
412-
row_to_write[INDEX_DATE_START] = str(date_earliest[:4])
413-
row_to_write[INDEX_DATE_END] = str(date_latest[:4])
414-
row_to_write[INDEX_EXTENT_NUMBER] = str(file_count)
415-
row_to_write[INDEX_EXTENT_TYPE] = "digital files"
416-
row_to_write[INDEX_SIZE] = str(human_readable_size(total_bytes))
417-
row_to_write[INDEX_SCOPE_CONTENTS] = scope_content
418-
419-
worksheet.append(row_to_write)
420-
421-
logger.info("Described %s successfully." % (sip_path))
422-
423-
workbook.save(filename=xlsx_path)
424-
workbook.close()
425-
426-
logger.info("ArchivesSpace description XLSX created.")
427-
428-
429259
def _parse_dfxml(dfxml_path, logger, export_all=False):
430260
"""Parse DFXML and return dict of information for spreadsheet."""
431261
volume_info = {
@@ -593,12 +423,6 @@ def _make_parser():
593423
help="Export AppleDouble resource forks from HFS-formatted disks",
594424
action="store_true",
595425
)
596-
parser.add_argument(
597-
"-c",
598-
"--csv",
599-
help="Write description CSV (old default) instead of ArchivesSpace XLSX",
600-
action="store_true",
601-
)
602426
parser.add_argument("--quiet", action="store_true", help="Write only errors to log")
603427
parser.add_argument(
604428
"source", help="Source directory containing disk images (and related files)"
@@ -740,16 +564,10 @@ def main():
740564
)
741565

742566
# write description
743-
if args.csv:
744-
try:
745-
create_spreadsheet(args, sips, volumes, logger)
746-
except Exception as err:
747-
logger.error(f"Error creating description csv: {err}")
748-
else:
749-
try:
750-
create_aspace_excel_sheet(args, sips, volumes, logger)
751-
except Exception as err:
752-
logger.error(f"Error creating ArchivesSpace description xlsx: {err}")
567+
try:
568+
create_spreadsheet(args, sips, volumes, logger)
569+
except Exception as err:
570+
logger.error(f"Error creating description csv: {err}")
753571

754572
# print unprocessed list
755573
if unprocessed:

install-bc2-ubuntu18.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ sudo cp LICENSE $dip_dir
4141
sudo cp README.md $dip_dir
4242
sudo cp -r disk_image_toolkit/ $dip_dir
4343

44-
if [ ! -d $dip_dir/aspace_template ]; then
45-
sudo mkdir $dip_dir/aspace_template
46-
fi
47-
sudo cp aspace_template/aspace_import_template.xlsx $dip_dir/aspace_template
48-
4944
if [ ! -d $dip_dir/disk_image_toolkit/dfxml ]; then
5045
sudo mkdir $dip_dir/disk_image_toolkit/dfxml/
5146
fi

install.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,6 @@ sudo cp LICENSE $dip_dir
3333
sudo cp README.md $dip_dir
3434
sudo cp -r disk_image_toolkit/ $dip_dir
3535

36-
if [ ! -d $dip_dir/aspace_template ]; then
37-
sudo mkdir $dip_dir/aspace_template
38-
fi
39-
sudo cp aspace_template/aspace_import_template.xlsx $dip_dir/aspace_template
40-
4136
if [ ! -d $dip_dir/disk_image_toolkit/dfxml ]; then
4237
sudo mkdir $dip_dir/disk_image_toolkit/dfxml/
4338
fi

main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def about_dialog(self):
4545
QMessageBox.information(
4646
self,
4747
"About",
48-
"Disk Image Processor v1.2.0\nCanadian Centre for Architecture\nDeveloper: Tessa Walsh\n2018-2023\nMIT License\nhttps://github.com/CCA-Public/cca-diskimageprocessor",
48+
"Disk Image Processor v1.3.0\nCanadian Centre for Architecture\nDeveloper: Tessa Walsh\n2018-2023\nMIT License\nhttps://github.com/CCA-Public/cca-diskimageprocessor",
4949
)
5050

5151
def browse_analysis_source(self):

requirements/base.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
bagit>=1.7.0
22
brunnhilde>=1.9.1
3-
openpyxl>=3.1.2

test-install.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ sudo cp disk_image_toolkit/dfxml/dfxml.py /usr/share/ccatools/diskimageprocessor
3535
sudo cp disk_image_toolkit/dfxml/objects.py /usr/share/ccatools/diskimageprocessor
3636
sudo cp disk_image_toolkit/dfxml/walk_to_dfxml.py /usr/share/ccatools/diskimageprocessor
3737

38-
if [ ! -d /usr/share/ccatools/diskimageprocessor/aspace_template ]; then
39-
sudo mkdir /usr/share/ccatools/diskimageprocessor/aspace_template
40-
fi
41-
sudo cp aspace_template/aspace_import_template.xlsx /usr/share/ccatools/diskimageprocessor/aspace_template
42-
4338
sudo cp disk_image_toolkit/dfxml/dfxml.py .
4439
sudo cp disk_image_toolkit/dfxml/objects.py .
4540
sudo cp disk_image_toolkit/dfxml/walk_to_dfxml.py .

0 commit comments

Comments
 (0)