|
29 | 29 | import datetime
|
30 | 30 | import itertools
|
31 | 31 | import logging
|
32 |
| -import openpyxl |
33 | 32 | import os
|
34 | 33 | import shutil
|
35 |
| -import stat |
36 | 34 | import subprocess
|
37 | 35 | import sys
|
38 | 36 | import time
|
@@ -258,174 +256,6 @@ def create_spreadsheet(args, sips, volumes, logger):
|
258 | 256 | logger.info("Description CSV created.")
|
259 | 257 |
|
260 | 258 |
|
261 |
| -def create_aspace_excel_sheet(args, sips, volumes, logger): |
262 |
| - """Create new copy of ASpace XLSX and append rows describing disk images.""" |
263 |
| - xlsx_path = os.path.abspath(os.path.join(args.destination, "description.xlsx")) |
264 |
| - template_path = os.path.abspath( |
265 |
| - os.path.join(THIS_DIR, "aspace_template", "aspace_import_template.xlsx") |
266 |
| - ) |
267 |
| - |
268 |
| - try: |
269 |
| - shutil.copyfile(template_path, xlsx_path) |
270 |
| - except OSError as err: |
271 |
| - logger.error(f"Unable to copy ASpace template to destination: {err}") |
272 |
| - |
273 |
| - # Set ASpace file permissions |
274 |
| - try: |
275 |
| - os.chmod( |
276 |
| - xlsx_path, |
277 |
| - stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH, |
278 |
| - ) |
279 |
| - except OSError as err: |
280 |
| - logger.error(f"Error setting permissions: {err}") |
281 |
| - |
282 |
| - workbook = openpyxl.load_workbook(filename=xlsx_path) |
283 |
| - worksheet = workbook["Data"] |
284 |
| - |
285 |
| - # TODO: Deduplicate with create_speadsheet |
286 |
| - # Maybe create separate method that creates dict with info, and handle |
287 |
| - # opening/writing csv or xlsx separately |
288 |
| - for item in sorted(os.listdir(sips)): |
289 |
| - sip_path = os.path.join(sips, item) |
290 |
| - |
291 |
| - if not os.path.isdir(sip_path): |
292 |
| - continue |
293 |
| - |
294 |
| - disk_volumes = volumes[item] |
295 |
| - number_volumes = len(disk_volumes) |
296 |
| - |
297 |
| - date_earliest = "" |
298 |
| - date_latest = "" |
299 |
| - |
300 |
| - # Get and sum information from all DFXML files generated |
301 |
| - dfxml_files = [] |
302 |
| - subdoc_dir = os.path.join(sip_path, "metadata", "submissionDocumentation") |
303 |
| - if args.bagfiles: |
304 |
| - subdoc_dir = os.path.join( |
305 |
| - sip_path, "data", "metadata", "submissionDocumentation" |
306 |
| - ) |
307 |
| - for root, _, files in os.walk(subdoc_dir): |
308 |
| - for file in files: |
309 |
| - if file.startswith("dfxml"): |
310 |
| - dfxml_files.append(os.path.join(root, file)) |
311 |
| - |
312 |
| - dfxml_files_info = [] |
313 |
| - for dfxml_file in dfxml_files: |
314 |
| - dfxml_info = _parse_dfxml(dfxml_file, logger) |
315 |
| - if not dfxml_info: |
316 |
| - logger.warning( |
317 |
| - "No fileobjects in DFXML file {} - possibly file system fiwalk doesn't recognize".format( |
318 |
| - dfxml_file |
319 |
| - ) |
320 |
| - ) |
321 |
| - continue |
322 |
| - dfxml_files_info.append(dfxml_info) |
323 |
| - |
324 |
| - file_count = sum([dfxml_info["files"] for dfxml_info in dfxml_files_info]) |
325 |
| - total_bytes = sum([dfxml_info["bytes"] for dfxml_info in dfxml_files_info]) |
326 |
| - file_systems = [volume["file_system"] for volume in disk_volumes] |
327 |
| - # Deduplicate list |
328 |
| - file_systems = list(dict.fromkeys(file_systems)) |
329 |
| - file_systems_str = ", ".join(file_systems) |
330 |
| - |
331 |
| - for dfxml_info in dfxml_files_info: |
332 |
| - if not date_earliest or dfxml_info["date_earliest"] < date_earliest: |
333 |
| - date_earliest = dfxml_info["date_earliest"] |
334 |
| - if not date_latest or dfxml_info["date_latest"] > date_latest: |
335 |
| - date_latest = dfxml_info["date_latest"] |
336 |
| - |
337 |
| - # Create list with empty string for each of template's columns |
338 |
| - row_to_write = [] |
339 |
| - for _ in range(173): |
340 |
| - row_to_write.append("") |
341 |
| - |
342 |
| - # Row indices for fields to write |
343 |
| - INDEX_FILENAME = 6 |
344 |
| - INDEX_LEVEL_OF_DESCRIPTION = 8 |
345 |
| - INDEX_DATE_START = 23 |
346 |
| - INDEX_DATE_END = 24 |
347 |
| - INDEX_EXTENT_NUMBER = 34 |
348 |
| - INDEX_EXTENT_TYPE = 35 |
349 |
| - INDEX_SIZE = 36 |
350 |
| - INDEX_SCOPE_CONTENTS = 170 |
351 |
| - |
352 |
| - # Fields that are always constant |
353 |
| - row_to_write[INDEX_FILENAME] = item |
354 |
| - row_to_write[INDEX_LEVEL_OF_DESCRIPTION] = "File" |
355 |
| - |
356 |
| - if file_count == 0: |
357 |
| - row_to_write[ |
358 |
| - INDEX_SCOPE_CONTENTS |
359 |
| - ] = "Error gathering statistics from SIP directory" |
360 |
| - |
361 |
| - worksheet.append(row_to_write) |
362 |
| - |
363 |
| - logger.error("Unable to read DFXML files for {}".format(sip_path)) |
364 |
| - continue |
365 |
| - |
366 |
| - # Get file formats from Brunnhilde |
367 |
| - file_formats = [] |
368 |
| - file_format_csv = os.path.join( |
369 |
| - sip_path, |
370 |
| - "metadata", |
371 |
| - "submissionDocumentation", |
372 |
| - "brunnhilde", |
373 |
| - "csv_reports", |
374 |
| - "formats.csv", |
375 |
| - ) |
376 |
| - if args.bagfiles: |
377 |
| - file_format_csv = os.path.join( |
378 |
| - sip_path, |
379 |
| - "data", |
380 |
| - "metadata", |
381 |
| - "submissionDocumentation", |
382 |
| - "brunnhilde", |
383 |
| - "csv_reports", |
384 |
| - "formats.csv", |
385 |
| - ) |
386 |
| - |
387 |
| - try: |
388 |
| - with open(file_format_csv, "r") as f: |
389 |
| - reader = csv.reader(f) |
390 |
| - next(reader) |
391 |
| - for row in itertools.islice(reader, 5): |
392 |
| - file_formats.append(row[0]) |
393 |
| - except: |
394 |
| - file_formats.append( |
395 |
| - "ERROR! No Brunnhilde formats.csv file to pull formats from." |
396 |
| - ) |
397 |
| - |
398 |
| - file_formats = [element or "Unidentified" for element in file_formats] |
399 |
| - file_formats_str = ", ".join(file_formats) |
400 |
| - |
401 |
| - if number_volumes > 1: |
402 |
| - scope_content = "Files exported from {} volumes with file systems: {}. File formats: {}".format( |
403 |
| - number_volumes, file_systems_str, file_formats_str |
404 |
| - ) |
405 |
| - else: |
406 |
| - scope_content = ( |
407 |
| - "Files exported from {} file system volume. File formats: {}".format( |
408 |
| - disk_volumes[0]["file_system"], file_formats_str |
409 |
| - ) |
410 |
| - ) |
411 |
| - |
412 |
| - row_to_write[INDEX_DATE_START] = str(date_earliest[:4]) |
413 |
| - row_to_write[INDEX_DATE_END] = str(date_latest[:4]) |
414 |
| - row_to_write[INDEX_EXTENT_NUMBER] = str(file_count) |
415 |
| - row_to_write[INDEX_EXTENT_TYPE] = "digital files" |
416 |
| - row_to_write[INDEX_SIZE] = str(human_readable_size(total_bytes)) |
417 |
| - row_to_write[INDEX_SCOPE_CONTENTS] = scope_content |
418 |
| - |
419 |
| - worksheet.append(row_to_write) |
420 |
| - |
421 |
| - logger.info("Described %s successfully." % (sip_path)) |
422 |
| - |
423 |
| - workbook.save(filename=xlsx_path) |
424 |
| - workbook.close() |
425 |
| - |
426 |
| - logger.info("ArchivesSpace description XLSX created.") |
427 |
| - |
428 |
| - |
429 | 259 | def _parse_dfxml(dfxml_path, logger, export_all=False):
|
430 | 260 | """Parse DFXML and return dict of information for spreadsheet."""
|
431 | 261 | volume_info = {
|
@@ -593,12 +423,6 @@ def _make_parser():
|
593 | 423 | help="Export AppleDouble resource forks from HFS-formatted disks",
|
594 | 424 | action="store_true",
|
595 | 425 | )
|
596 |
| - parser.add_argument( |
597 |
| - "-c", |
598 |
| - "--csv", |
599 |
| - help="Write description CSV (old default) instead of ArchivesSpace XLSX", |
600 |
| - action="store_true", |
601 |
| - ) |
602 | 426 | parser.add_argument("--quiet", action="store_true", help="Write only errors to log")
|
603 | 427 | parser.add_argument(
|
604 | 428 | "source", help="Source directory containing disk images (and related files)"
|
@@ -740,16 +564,10 @@ def main():
|
740 | 564 | )
|
741 | 565 |
|
742 | 566 | # write description
|
743 |
| - if args.csv: |
744 |
| - try: |
745 |
| - create_spreadsheet(args, sips, volumes, logger) |
746 |
| - except Exception as err: |
747 |
| - logger.error(f"Error creating description csv: {err}") |
748 |
| - else: |
749 |
| - try: |
750 |
| - create_aspace_excel_sheet(args, sips, volumes, logger) |
751 |
| - except Exception as err: |
752 |
| - logger.error(f"Error creating ArchivesSpace description xlsx: {err}") |
| 567 | + try: |
| 568 | + create_spreadsheet(args, sips, volumes, logger) |
| 569 | + except Exception as err: |
| 570 | + logger.error(f"Error creating description csv: {err}") |
753 | 571 |
|
754 | 572 | # print unprocessed list
|
755 | 573 | if unprocessed:
|
|
0 commit comments