diff --git a/Pipfile b/Pipfile
index f498f383c..abf710389 100644
--- a/Pipfile
+++ b/Pipfile
@@ -63,6 +63,7 @@ xmlsec = "<1.3.14"
 kcworks = {file = "site", editable = true}
 flask-iiif = "*"
 sentry-sdk = "*"
+pydantic = "*"
 
 [requires]
 python_version = "3.9"
diff --git a/Pipfile.lock b/Pipfile.lock
index d69257138..b7d9545f7 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "6f4a6f66fc846912f8bac226f2deff7633cae1a41e5a27c843f025c85c269b2a"
+            "sha256": "19b3aa2106ee0e1f8da0e521ffba267e03cc75a35557cd63e1a8c3bdd4117998"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -146,6 +146,14 @@
             ],
             "version": "==10.0.0"
         },
+        "annotated-types": {
+            "hashes": [
+                "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53",
+                "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==0.7.0"
+        },
         "appdirs": {
             "hashes": [
                 "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
@@ -3202,6 +3210,121 @@
             "markers": "python_version >= '3.8'",
             "version": "==2.22"
         },
+        "pydantic": {
+            "hashes": [
+                "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584",
+                "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"
+            ],
+            "index": "pypi",
+            "markers": "python_version >= '3.8'",
+            "version": "==2.10.6"
+        },
+        "pydantic-core": {
+            "hashes": [
+                "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278",
+                "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50",
+                "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9",
+                "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f",
+                "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6",
+                "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc",
+                "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54",
+                "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630",
+                "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9",
+                "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236",
+                "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7",
+                "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee",
+                "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b",
+                "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048",
+                "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc",
+                "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130",
+                "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4",
+                "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd",
+                "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4",
+                "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7",
+                "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7",
+                "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4",
+                "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e",
+                "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa",
+                "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6",
+                "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962",
+                "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b",
+                "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f",
+                "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474",
+                "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5",
+                "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459",
+                "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf",
+                "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a",
+                "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c",
+                "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76",
+                "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362",
+                "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4",
+                "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934",
+                "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320",
+                "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118",
+                "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96",
+                "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306",
+                "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046",
+                "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3",
+                "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2",
+                "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af",
+                "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9",
+                "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67",
+                "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a",
+                "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27",
+                "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35",
+                "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b",
+                "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151",
+                "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b",
+                "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154",
+                "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133",
+                "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef",
+                "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145",
+                "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15",
+                "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4",
+                "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc",
+                "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee",
+                "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c",
+                "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0",
+                "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5",
+                "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57",
+                "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b",
+                "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8",
+                "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1",
+                "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da",
+                "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e",
+                "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc",
+                "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993",
+                "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656",
+                "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4",
+                "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c",
+                "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb",
+                "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d",
+                "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9",
+                "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e",
+                "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1",
+                "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc",
+                "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a",
+                "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9",
+                "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506",
+                "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b",
+                "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1",
+                "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d",
+                "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99",
+                "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3",
+                "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31",
+                "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c",
+                "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39",
+                "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a",
+                "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308",
+                "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2",
+                "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228",
+                "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b",
+                "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9",
+                "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==2.27.2"
+        },
         "pydash": {
             "hashes": [
                 "sha256:35caa588e01d293713655e0870544d25128cd414c5e19477a0d63adc2b2ca03e",
diff --git a/docs/source/api.md b/docs/source/api.md
index 04abde259..a9d85d65d 100644
--- a/docs/source/api.md
+++ b/docs/source/api.md
@@ -103,8 +103,12 @@ Why is this API needed? The InvenioRDM REST API can be fragile and difficult to
 
 ### Who can use the import API?
 
+The import API is available to authorized organizations who have obtained an OAuth token for API operations.
 The import API is available to authorized organizations who have obtained an OAuth token for API operations.
 
+The import API places the works directly in a collection, without passing through the review process. So, the user to whom the token is issued must have sufficient permissions to publish directly in the collection. The exact role required depends on the collection's review policy:
+- *If the review policy allows managers and curators to skip the review process*, the user of the import API must have one of the roles "manager," "curator," or "owner" in the collection.
+- *If the review policy requires all submissions to be reviewed*, the user of the import API must have the "owner" role in the collection.
 The import API places the works directly in a collection, without passing through the review process. So, the user to whom the token is issued must have sufficient permissions to publish directly in the collection. The exact role required depends on the collection's review policy:
 - *If the review policy allows managers and curators to skip the review process*, the user of the import API must have one of the roles "manager," "curator," or "owner" in the collection.
 - *If the review policy requires all submissions to be reviewed*, the user of the import API must have the "owner" role in the collection.
@@ -114,6 +118,7 @@ The import API places the works directly in a collection, without passing throug
 #### Request
 ```
 POST https://works.hcommons.org/api/import/<my-collection-id> HTTP/1.1
+POST https://works.hcommons.org/api/import/<my-collection-id> HTTP/1.1
 ```
 
 #### Required headers
@@ -132,6 +137,15 @@ Only one URL path parameter is required:
 | `collection` | no | `string` | The ID (either the url slug or the UUID) of the collection to which the work should be published. If this value is provided, the work will be submitted to the collection immediately after import. If the collection requires review, and the `review_required` parameter is set to "true", the work will be placed in the collection's review queue. |
 
 
+#### Request url path parameters
+
+Only one URL path parameter is required:
+
+| Name | Required | Type | Description |
+|------|----------|------|-------------|
+| `collection` | no | `string` | The ID (either the url slug or the UUID) of the collection to which the work should be published. If this value is provided, the work will be submitted to the collection immediately after import. If the collection requires review, and the `review_required` parameter is set to "true", the work will be placed in the collection's review queue. |
+
+
 #### Request body
 
 This request must be made with a multipart/form-data request. The request body must include parts with following names:
@@ -139,13 +153,16 @@ This request must be made with a multipart/form-data request. The request body m
 | Name | Required | Content Type | Description |
 |-------|----------|--------------|-------------|
 | `files` | yes | `application/octet-stream` | The (binary) file content to be uploaded. If multiple files are being uploaded, a body part with this same name ("files") must be provided for each file. If more than three or four files are being uploaded, it is recommended to provide a single zip archive containing all of the files. The files will be assigned to the appropriate work based on filename, so where multiple files are provided these **must be unique**. If a zip archive is provided, the files must be contained in a single compressed folder with no subfolders. |
-| `metadata` | yes | `application/json` | An array of JSON metadata objects, each of which will be used to create a new work. Each must following the KCWorks implementation of the InvenioRDM metadata schema described {ref}`here <metadata:metadata-schema-vocabularies-and-identifiers>`. In addition, an array of owners for the work may optionally be provided by adding an `access.owned_by` property to each metadata object. Note that if no owners are provided, the work will be created with the organizational account that issued the OAuth token as the owner. |
+| `metadata` | yes | `application/json` | An array of JSON metadata objects, each of which will be used to create a new work. Each must following the KCWorks implementation of the InvenioRDM metadata schema described {ref}`here <metadata:metadata-schema-vocabularies-and-identifiers>`. In addition, an array of owners for the work may optionally be provided by adding a `parent.access.owned_by` property to each metadata object. Note that if no owners are provided, the work will be created with the organizational account that issued the OAuth token as the owner. |
 | `review_required` | no | `text/plain` | A string representation of a boolean (either "true" or "false") indicating whether the work should be reviewed before publication. This setting is only relevant if the work is intended for publication in a collection that requires review. It will override the collection's usual review policy, since the work is being uploaded by a collection administrator. (Default: "true") |
 | `strict_validation` | no | `text/plain` | A string representation of a boolean (either "true" or "false") indicating whether the import request should be rejected if any validation errors are encountered. If this value is "false", the imported work will be created in KCWorks even if some of the provided metadata does not conform to the KCWorks metadata schema, provided these are not required fields. If this value is "true", the import request will be rejected if any validation errors are encountered. (Default: "true") |
 | `all_or_none` | no | `text/plain` | A string representation of a boolean (either "true" or "false") indicating whether the entire import request should be rejected if any of the works fail to be created (whether for validation errors, upload errors, or other reasons). If this value is "false", the import request will be accepted even if some of the works cannot be created. The response in this case will include a list of works that were successfully created and a list of errors for the works that failed to be created. (Default: "true") |
 
 #### Identifying the owners of the work
 
+The array of owners, if provided in a metadata object's `parent.access.owned_by` property, must include at least the full name and email address of the users to be added as owners of the work. If the user already has a Knowledge Commons account, their username should also be provided. Additional identifiers (e.g., ORCID) may be provided as well to help avoid duplicate accounts, since a KCWorks account will be created for each user if they do not already have one.
+#### Identifying the owners of the work
+
 The array of owners, if provided in a metadata object's `parent.access.owned_by` property, must include at least the full name and email address of the users to be added as owners of the work. If the user already has a Knowledge Commons account, their username should also be provided. Additional identifiers (e.g., ORCID) may be provided as well to help avoid duplicate accounts, since a KCWorks account will be created for each user if they do not already have one.
 
 | key | required | type | description |
@@ -182,6 +199,16 @@ Note that it is *not* assumed that the creators of a work should be the work's o
 
 > Note, too, that only the first member of the owners array will technically be assigned as the work's owner in KCWorks. The other owners will be assigned access grants to the work with "manage" permissions.
 
+#### KC accounts for work owners
+
+KCWorks will create an internal KCWorks account for each work owner who does not already have an account on Knowledge Commons. Note that this *does not* create a full Knowledge Commons account. The owner will still need to visit Knowledge Commons to create an account through the usual registration process. When they do so, their KCWorks account will be linked to their Knowledge Commons account and they will be able to manage and edit their uploaded works.
+
+> It is vital that the owner provide an identifier when they create their Knowledge Commons account that matches an identifier provided for them in the `owned_by` property of the work's metadata object. This allows KCWorks to link the owner's KCWorks account to their Knowledge Commons account after they register. The connecting identifier may be
+> - the same primary email address
+> - the same ORCID identifier
+
+If an owner does not already belong to the collection to which the records are being imported, that owner will also be added to the collection's membership with the "reader" role. The allows them access to any records restricted to the collection's membership, but does not afford them any additional permissions. What it does mean is that collection managers will be able to see all of the work owners in the list of collection members on the collection's landing page.
+
 #### Identifying the work for import
 
 It is crucial that each work to be imported is assigned a unique identifier. This may be an identifier used internally by the importing organization, it may be a universally unique string such as a UUID, or it may be a universal identifier such as a DOI or a handle. In either case it must be unique across all works to be imported for the collection. This identifier will be used to identify the work in the response, and will be used to identify the work when checking for duplicate imports.
diff --git a/docs/source/changelog.md b/docs/source/changelog.md
index 6f876f44d..e0b3e9675 100644
--- a/docs/source/changelog.md
+++ b/docs/source/changelog.md
@@ -4,6 +4,34 @@
 
 # Changes
 
+## 0.4.0-beta9 (2025-02-25)
+
+- Importer
+    - Added a new streamlined importer API.
+- Remote user data service
+    - Fixed bug where user profile data was not being updated because comparison with initial data was not being made correctly.
+    - Improved handling of timeout errors when fetching data from the remote source.
+- Documentation
+    - Added documentation for the new importer API.
+    - Improved documentation for other API endpoints and metadata fields.
+    - Added documentation of InvenioRDM service architecture for developers.
+- Email notifications
+    - Improved formatting of moderation email notifications for first uploads/publications.
+- Testing
+    - Extensive improvements to the test suite, including new tests for the new importer API and remote user data service.
+    - Added workflow to run tests on Github
+- User data sync
+    - Fixed several bugs in the user data sync process.
+    - Added cli commands to fetch KCWorks user and group data
+- Search provisioning
+    - Fixed bugs in search provisioning and implemented new tests.
+- Export menu
+    - Fixed a bug preventing the export menu from working on the detail page.
+- Large uploads
+    - Raised max content length for large uploads.
+- Account linking
+    - Now can link existing KCWorks accounts to KC accounts on login based on email address, ORCID id, or KC username.
+
 ## 0.3.5-beta8 (2025-01-10)
 
 - Dashboard works search
diff --git a/invenio.cfg b/invenio.cfg
index 1a738d7d9..01dfeb5c1 100644
--- a/invenio.cfg
+++ b/invenio.cfg
@@ -55,7 +55,6 @@ from invenio_rdm_records.services.stats import (
 )
 from invenio_rdm_records.services import facets
 from invenio_records_resources.services.custom_fields import TextCF
-from invenio_saml.handlers import acs_handler_factory
 from invenio_stats.contrib.event_builders import build_file_unique_id
 from invenio_stats.processors import EventsIndexer, anonymize_user, flag_robots
 from invenio_stats.queries import TermsQuery
@@ -127,8 +126,10 @@ from kcworks.metadata_fields.kcr_series_field import (
     KCR_SERIES_FIELDS_UI,
 )
 from kcworks.services.accounts.saml import (
+    acs_handler_factory,
     knowledgeCommons_account_setup,
     knowledgeCommons_account_info,
+    knowledgeCommons_account_get_user,
 )
 from kcworks.services.notifications.builders import (
     CustomCommunityInvitationAcceptNotificationBuilder,
@@ -1250,6 +1251,16 @@ RDM_RECORDS_IDENTIFIERS_SCHEMES.update(
             "validator": idutils.is_doi,
             "datacite": "Other",
         },
+        "import-recid": {
+            "label": _("Import record ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
+        "neh-recid": {
+            "label": _("NEH record ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
     }
 )
 
@@ -1275,6 +1286,16 @@ RDM_RECORDS_PERSONORG_SCHEMES.update(
             "validator": always_valid,
             "datacite": "Other",
         },
+        "neh_user_id": {
+            "label": _("NEH user ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
+        "import_user_id": {
+            "label": _("Import user ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
     }
 )
 
@@ -1540,6 +1561,7 @@ SSO_SAML_IDPS = {
             "knowledgeCommons",
             account_info=knowledgeCommons_account_info,
             account_setup=knowledgeCommons_account_setup,
+            user_lookup=knowledgeCommons_account_get_user,
         ),
         # Automatically set `confirmed_at` for users upon
         # registration, when using the default `acs_handler`
@@ -1841,15 +1863,6 @@ COMMUNITIES_ROLES = [
         can_curate=True,
         can_view=True,
     ),
-    # dict(
-    #     name="administrator",
-    #     title=_("Administrator"),
-    #     description=_("Full administrative access to the entire community."),
-    #     can_manage_roles=["administrator", "manager", "curator", "reader"],
-    #     can_manage=True,
-    #     can_curate=True,
-    #     can_view=True,
-    # ),
     dict(
         name="owner",
         title=_("Owner"),
@@ -5672,6 +5685,7 @@ class CustomUserProfileSchema(Schema):
     identifier_email = fields.String()
     identifier_orcid = fields.String()
     identifier_kc_username = fields.String()
+    identifier_other = fields.String()
     unread_notifications = fields.String()
 
 
diff --git a/site/kcworks/services/accounts/saml.py b/site/kcworks/services/accounts/saml.py
index db69a5d0b..e607f7aaa 100644
--- a/site/kcworks/services/accounts/saml.py
+++ b/site/kcworks/services/accounts/saml.py
@@ -1,13 +1,81 @@
 from datetime import datetime, timezone
-from flask import current_app
+from flask import current_app, abort
+from flask_login import current_user
 from invenio_access.permissions import system_identity
-from invenio_accounts.models import User
+from invenio_accounts.models import User, UserIdentity
 from invenio_accounts.proxies import current_accounts
+from invenio_db import db
 from invenio_oauthclient.errors import AlreadyLinkedError
+from invenio_oauthclient.utils import (
+    create_csrf_disabled_registrationform,
+    fill_form,
+)
 from invenio_remote_user_data_kcworks.proxies import (
     current_remote_user_data_service,
 )
-from invenio_saml.invenio_accounts.utils import account_link_external_id
+from invenio_saml.invenio_accounts.utils import (
+    _get_external_id,
+    account_authenticate,
+    account_link_external_id,
+    account_register,
+)
+from invenio_saml.invenio_app import get_safe_redirect_target
+
+
+def knowledgeCommons_account_get_user(account_info=None):
+    """Retrieve user object for the given request.
+
+    Extends invenio_saml.invenio_accounts.utils.account_get_user to allow for
+    retrieving a user by ORCID as well as email.
+
+    Uses either the access token or extracted account information to retrieve
+    the user object.
+
+    :param account_info: The dictionary with the account info.
+        (Default: ``None``)
+    :returns: A :class:`invenio_accounts.models.User` instance or ``None``.
+    """
+    if account_info:
+        current_app.logger.debug(f"account_info: {account_info}")
+        external_id = _get_external_id(account_info)
+        if external_id:
+            user = UserIdentity.get_user(external_id["method"], external_id["id"])
+            if user:
+                return user
+
+        orcid = account_info.get("user", {}).get("profile", {}).get("identifier_orcid")
+        if orcid:
+            current_app.logger.debug(f"orcid: {orcid}")
+            orcid_match = User.query.filter(
+                User._user_profile.op("->>")("identifier_orcid") == orcid
+            ).one_or_none()
+            current_app.logger.debug(f"orcid_match: {orcid_match}")
+            if orcid_match:
+                return orcid_match
+        kc_username = (
+            account_info.get("user", {})
+            .get("profile", {})
+            .get("identifier_kc_username")
+        )
+        if kc_username:
+            current_app.logger.debug(f"kc_username: {kc_username}")
+            kc_username_match = User.query.filter_by(
+                username=f"{account_info['external_method']}-{kc_username}"
+            ).one_or_none()
+            if not kc_username_match:
+                kc_username_match = User.query.filter(
+                    User._user_profile.op("->>")("identifier_kc_username")
+                    == kc_username
+                ).one_or_none()
+            if kc_username_match:
+                return kc_username_match
+        email = account_info.get("user", {}).get("email")
+        if email:
+            current_app.logger.debug(f"email: {email}")
+            email_match = User.query.filter_by(email=email).one_or_none()
+            if email_match:
+                return email_match
+    return None
 
 
 def knowledgeCommons_account_setup(user: User, account_info: dict) -> bool:
@@ -27,6 +95,8 @@ def knowledgeCommons_account_setup(user: User, account_info: dict) -> bool:
         )
         if not user.active:
             assert current_accounts.datastore.activate_user(user)
+        if not user.confirmed_at:
+            assert current_accounts.datastore.verify_user(user)
         current_accounts.datastore.commit()
         current_remote_user_data_service.update_user_from_remote(
             system_identity,
@@ -36,6 +106,11 @@ def knowledgeCommons_account_setup(user: User, account_info: dict) -> bool:
         )
         return True
     except AlreadyLinkedError:
+        # FIXME: temporary fix to ensure older users are active and confirmed
+        if not user.active:
+            assert current_accounts.datastore.activate_user(user)
+        if not user.confirmed_at:
+            assert current_accounts.datastore.verify_user(user)
         current_remote_user_data_service.update_user_from_remote(
             system_identity,
             user.id,
@@ -77,13 +152,13 @@ def knowledgeCommons_account_info(attributes: dict, remote_app: str) -> dict:
         email = attributes.get(mappings["email"], [None])[0]
         affiliations = ""
 
-        if email is None:
-            remote_data: dict = current_remote_user_data_service.fetch_from_remote_api(
-                remote_app, external_id
-            )
-            print(f"Remote data: {remote_data}")
-            email: str = remote_data.get("users", {}).get("email", None)
-            assert email is not None
+        remote_data: dict = current_remote_user_data_service.fetch_from_remote_api(
+            remote_app, external_id
+        )
+        print(f"Remote data: {remote_data}")
+        orcid: str = remote_data.get("users", {}).get("orcid", None)
+        email: str = remote_data.get("users", {}).get("email", None)
+        assert email is not None
     except KeyError:
         raise ValueError(
             f"Missing required KC account username in SAML response from IDP: no "
@@ -95,21 +170,124 @@ def knowledgeCommons_account_info(attributes: dict, remote_app: str) -> dict:
             f"entity with key {mappings['email']} and fetch from KC api failed"
         )
 
+    profile_dict = dict(
+        username=username,  # shifted from profile to user by register form
+        full_name=name + " " + surname,
+        affiliations=affiliations,
+        identifier_kc_username=external_id.lower(),
+    )
+    if orcid:
+        profile_dict["identifier_orcid"] = orcid
+
     return dict(
         user=dict(
             email=email,
-            profile=dict(
-                username=username,
-                full_name=name + " " + surname,
-                affiliations=affiliations,
-            ),
+            profile=profile_dict,
         ),
         external_id=external_id,
         external_method=remote_app,
         active=True,
         confirmed_at=(
             datetime.now(timezone.utc)
-            if remote_app_config.get("auto_confirm", False)
+            if remote_app_config.get("auto_confirm", True)
             else None
         ),
     )
+
+
+def acs_handler_factory(
+    remote_app,
+    account_info=knowledgeCommons_account_info,
+    account_setup=knowledgeCommons_account_setup,
+    user_lookup=knowledgeCommons_account_get_user,
+):
+    """Generate ACS handlers with an specific account info and setup functions.
+
+    .. note::
+
+        In 90% of the cases the ACS handler is going to be the same, only the
+        way the information is extracted and processed from the IdP will be
+        different.
+
+    :param remote_app: string representing the name of the identity provider.
+
+    :param account_info: callable to extract the account information from a
+        dict like object. ``mappings`` key is required whe using it.
+        This function is expected to return a dictionary similar to this:
+
+        .. code-block:: python
+
+            dict(
+                user=dict(
+                    email='federico@example.com',
+                    profile=dict(username='federico',
+                                 full_name='Federico Fernandez'),
+                ),
+                external_id='12345679abcdf',
+                external_method='example',
+                active=True
+             )
+
+        Where ``external_id`` is the ID provided by the IdP and
+        ``external_method`` is the name of the IdP as in the configuration
+        file (not mandatory but recommended).
+
+    :param account_setup: callable to setup the user account with the
+        corresponding IdP account information. Typically this means creating a
+        new row under ``UserIdentity`` and maybe extending  ``g.identity``.
+
+    :param user_lookup: callable to retrieve any user whose information matches
+        what is returned by the `account_info` callable. This then returns a
+        User object if a match is present and None if no match is found.
+
+    :return: function to be used as ACS handler
+    """
+
+    def default_acs_handler(auth, next_url):
+        """Default ACS handler.
+
+        :para auth: A :class:`invenio_saml.utils.SAMLAuth` instance.
+        :param next_url: String with the next URL to redirect to.
+
+        :return: Next URL
+        """
+        current_app.logger.debug("ACS handler called")
+        current_app.logger.debug(
+            "Current user is authenticated: %s", current_user.is_authenticated
+        )
+        if not current_user.is_authenticated:
+            current_app.logger.debug(
+                "Metadata received from IdP %s", auth.get_attributes()
+            )
+            _account_info = account_info(auth.get_attributes(), remote_app)
+            current_app.logger.debug("Metadata extracted from IdP %s", _account_info)
+            # TODO: signals?
+            current_app.logger.debug(
+                f"OAUTHCLIENT_SIGNUP_FORM: {current_app.config['OAUTHCLIENT_SIGNUP_FORM']}"
+            )
+
+            user = user_lookup(_account_info)
+            current_app.logger.debug(f"user: {user}")
+
+            if user is None:
+                form = create_csrf_disabled_registrationform(remote_app)
+                form = fill_form(form, _account_info["user"])
+                user = account_register(
+                    form, confirmed_at=_account_info["confirmed_at"]
+                )
+
+            # if registration fails ... TODO: signup?
+            if user is None or not account_authenticate(user):
+                abort(401)
+
+            account_setup(user, _account_info)
+
+        db.session.commit()  # type: ignore
+
+        next_url = (
+            get_safe_redirect_target(_target=next_url)
+            or current_app.config["SECURITY_POST_LOGIN_VIEW"]
+        )
+        return next_url
+
+    return default_acs_handler
diff --git a/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.create.jinja b/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.create.jinja
index 0234894da..e611218b4 100644
--- a/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.create.jinja
+++ b/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.create.jinja
@@ -14,39 +14,43 @@
 {%- endblock subject -%}
 
 {%- block html_body -%}
+
+<h2>{{ _("KCWorks moderation notice:") }}</h2>
+<p>{{ _("A new user has created their first draft.") }}</p>
 <table style="font-family:'Lato',Helvetica,Arial,sans-serif;border-spacing:15px">
     <tr>
-        <td>
-           {{ _("A new user has created their first draft.") }}
-        </td>
-    </tr>
-    <tr>
-        <td>{{ _("User name: {user_name}").format(user_name=submitter_name) }}</td>
+        <td><b>{{ _("User name") }}</b></td>
+        <td>{{ submitter_name }}</td>
     </tr>
     <tr>
-        <td>{{ _("User email: {user_email}").format(user_email=submitter_email) }}</td>
+        <td><b>{{ _("User email") }}</b></td>
+        <td>{{ submitter_email }}</td>
     </tr>
     <tr>
-        <td>{{ _("User ID: {user_id}").format(user_id=submitter_id) }}</td>
+        <td><b>{{ _("User ID") }}</b></td>
+        <td>{{ submitter_id }}</td>
     </tr>
     <tr>
-        <td>{{ _("Draft title: {record_title}").format(record_title=record_title) }}</td>
+        <td><b>{{ _("Draft title") }}</b></td>
+        <td>{{ record_title }}</td>
     </tr>
     <tr>
-        <td>
-            {{ _("Draft ID: {draft_id}").format(draft_id=draft_id) }} (<a href='{{ config.get("SITE_UI_URL") }}/records/{{ draft_id }}'>{{ _("View draft") }}</a>)
+        <td><b>{{ _("Draft ID") }}</b></td>
+        <td>{{ draft_id }} (<a href='{{ config.get("SITE_UI_URL") }}/records/{{ draft_id }}'>{{ _("View draft") }}</a>)
         </td>
     </tr>
-    <tr>
+    {# <tr>
         <td>{{ _("Full metadata:") }}</td>
     </tr>
     <tr>
         <td>{{ notification.context.data | tojson(indent=2) | safe }}</td>
-    </tr>
+    </tr> #}
 </table>
 {%- endblock html_body %}
 
 {%- block plain_body -%}
+{{ _("KCWorks moderation notice:") }}
+
 {{ _("A new user has created their first draft.") }}
 
 {{ _("User name: {user_name}").format(user_name=submitter_name) }}
@@ -59,14 +63,16 @@
 
 {{ _("Draft ID: {draft_id}").format(draft_id=draft_id) }} ({{ config.get('SITE_UI_URL') }}/records/{{ draft_id }})
 
-{{ _("Full metadata:") }}
+{# {{ _("Full metadata:") }}
 
-{{ notification.context.data | tojson(indent=2) | safe }}
+{{ notification.context.data | tojson(indent=2) | safe }} #}
 
 {%- endblock plain_body %}
 
 {# Markdown for Slack/Mattermost/chat #}
 {%- block md_body -%}
+{{ _("KCWorks moderation notice:") }}
+
 {{ _("A new user has created their first draft.") }}
 
 {{ _("User name: {user_name}").format(user_name=submitter_name) }}
@@ -79,7 +85,7 @@
 
 {{ _("Draft ID: {draft_id}").format(draft_id=draft_id) }} [View draft]({{ config.get('SITE_UI_URL') }}/records/{{ draft_id }})
 
-{{ _("Full metadata:") }}
+{# {{ _("Full metadata:") }}
 
-{{ notification.context.data | tojson(indent=2) | safe }}
+{{ notification.context.data | tojson(indent=2) | safe }} #}
 {%- endblock md_body %}
\ No newline at end of file
diff --git a/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.publish.jinja b/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.publish.jinja
index f81a763c3..20bb8fa00 100644
--- a/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.publish.jinja
+++ b/site/kcworks/templates/semantic-ui/invenio_notifications/user-first-record.publish.jinja
@@ -15,42 +15,42 @@
 {%- endblock subject -%}
 
 {%- block html_body -%}
+<h2>{{ _("KCWorks moderation notice:") }}</h2>
+<p>{{ _("A new user has published their first work.") }}</p>
 <table style="font-family:'Lato',Helvetica,Arial,sans-serif;border-spacing:15px">
     <tr>
-        <td>
-           {{ _("A new user has published their first work.") }}
-        </td>
-    </tr>
-    <tr>
-        <td>{{ _("User name: {user_name}").format(user_name=submitter_name) }}</td>
+        <td><b>{{ _("User name") }}</b></td>
+        <td>{{ submitter_name }}</td>
     </tr>
     <tr>
-        <td>{{ _("User email: {user_email}").format(user_email=submitter_email) }}</td>
+        <td><b>{{ _("User email") }}</b></td>
+        <td>{{ submitter_email }}</td>
     </tr>
     <tr>
-        <td>{{ _("User ID: {user_id}").format(user_id=submitter_id) }}</td>
+        <td><b>{{ _("User ID") }}</b></td>
+        <td>{{ submitter_id }}</td>
     </tr>
     <tr>
-        <td>{{ _("Work title: {record_title}").format(record_title=record_title) }}</td>
+        <td><b>{{ _("Work title") }}</b></td>
+        <td>{{ record_title }}</td>
     </tr>
     <tr>
-        <td>
-            {{ _("Work ID: {record_id}").format(record_id=record_id) }}
-            (<a href="{{ config.get('SITE_UI_URL') }}/records/{{ record_id }}">
-                {{ _("View work") }}
-            </a>)
+        <td><b>{{ _("Work ID") }}</b></td>
+        <td>{{ record_id }} (<a href="{{ config.get('SITE_UI_URL') }}/records/{{ record_id }}">{{ _("View work") }}</a>)
         </td>
     </tr>
-    <tr>
-        <td>{{ _("Full metadata:") }}</td>
+    {# <tr>
+        <td><b>{{ _("Full metadata") }}</b></td>
     </tr>
     <tr>
         <td>{{ notification.context.record | tojson(indent=2) | safe }}</td>
-    </tr>
+    </tr> #}
 </table>
 {%- endblock html_body %}
 
 {%- block plain_body -%}
+{{ _("KCWorks moderation notice:") }}
+
 {{ _("A new user has published their first work.") }}
 
 {{ _("User name: {user_name}").format(user_name=submitter_name) }}
@@ -66,14 +66,16 @@
 {{ _("Work ID: {record_id}").format(record_id=record_id) }}
 ({{ config.get('SITE_UI_URL') }}/records/{{ record_id }})
 
-{{ _("Full metadata:") }}
+{# {{ _("Full metadata:") }}
 
-{{ notification.context.record | tojson(indent=2) | safe }}
+{{ notification.context.record | tojson(indent=2) | safe }} #}
 
 {%- endblock plain_body %}
 
 {# Markdown for Slack/Mattermost/chat #}
 {%- block md_body -%}
+{{ _("KCWorks moderation notice:") }}
+
 {{ _("A new user has published their first work.") }}
 
 {{ _("User name: {user_name}").format(user_name=submitter_name) }}
@@ -86,7 +88,7 @@
 
 {{ _("Work ID: {record_id}").format(record_id=record_id) }} [View work]({{ config.get('SITE_UI_URL') }}/records/{{ record_id }})
 
-{{ _("Full metadata:") }}
+{# {{ _("Full metadata:") }}
 
-{{ notification.context.record | tojson(indent=2) | safe }}
+{{ notification.context.record | tojson(indent=2) | safe }} #}
 {%- endblock md_body %}
\ No newline at end of file
diff --git a/site/tests/api/test_accounts.py b/site/tests/api/test_accounts.py
index d5172c646..685205e64 100644
--- a/site/tests/api/test_accounts.py
+++ b/site/tests/api/test_accounts.py
@@ -1,13 +1,16 @@
+import copy
+from pprint import pformat
 import pytest
 import datetime
 from flask import Flask
 from invenio_accounts import current_accounts
 from invenio_accounts.models import User
-from invenio_saml.handlers import acs_handler_factory
 import json
 from kcworks.services.accounts.saml import (
     knowledgeCommons_account_info,
     knowledgeCommons_account_setup,
+    knowledgeCommons_account_get_user,
+    acs_handler_factory,
 )
 import pytz
 from requests_mock.adapter import _Matcher as Matcher
@@ -18,37 +21,32 @@
 
 
 @pytest.mark.parametrize(
-    "attributes,output,user_data,api_call_count",
+    "attributes,output,user_data",
     [
         (
             idp_responses["joanjett"]["raw_data"],
             idp_responses["joanjett"]["extracted_data"],
             user_data_set["joanjett"],
-            0,
         ),
         (
             idp_responses["user1"]["raw_data"],
             idp_responses["user1"]["extracted_data"],
             user_data_set["user1"],
-            0,
         ),
         (
             idp_responses["user2"]["raw_data"],
             idp_responses["user2"]["extracted_data"],
             user_data_set["user2"],
-            0,
         ),
         (
             idp_responses["user3"]["raw_data"],
             idp_responses["user3"]["extracted_data"],
             user_data_set["user3"],
-            1,
         ),
         (
             idp_responses["user4"]["raw_data"],
             idp_responses["user4"]["extracted_data"],
             user_data_set["user4"],
-            0,
         ),
     ],
 )
@@ -61,7 +59,6 @@ def test_knowledgeCommons_account_info(
     user_data: dict,
     mock_user_data_api: Callable,
     user_data_to_remote_data: Callable,
-    api_call_count: int,
 ) -> None:
     """
     Test the custom handler
@@ -75,12 +72,8 @@ def test_knowledgeCommons_account_info(
     info: dict = knowledgeCommons_account_info(
         attributes, remote_app="knowledgeCommons"
     )
-    if api_call_count == 1:  # Here the api is only called if no email is provided
-        assert mock_adapter.called
-        assert mock_adapter.call_count == 1
-    else:
-        assert not mock_adapter.called
-        assert mock_adapter.call_count == 0
+    assert mock_adapter.called
+    assert mock_adapter.call_count == 1
 
     expected_result_email: str = (
         output["user"]["email"]
@@ -97,12 +90,145 @@ def test_knowledgeCommons_account_info(
     assert info["user"]["profile"]["username"] == output["user"]["profile"]["username"]
     assert info["external_id"] == output["external_id"]
     assert info["external_method"] == output["external_method"]
+    assert info["user"]["profile"].get("identifier_orcid", "") == output["user"][
+        "profile"
+    ].get("identifier_orcid", "")
+    assert info["user"]["profile"].get("identifier_kc_username", "") == output["user"][
+        "profile"
+    ].get("identifier_kc_username", "")
     assert info["active"] == output["active"]
     assert datetime.datetime.now(tz=pytz.timezone("US/Eastern")) - info[
         "confirmed_at"
     ] < datetime.timedelta(seconds=10)
 
 
+@pytest.mark.parametrize(
+    "original_email,original_orcid,original_kc_username,"
+    "user_data,idp_data,already_linked,user_expected",
+    [
+        (  # pre-existing user with same email and ORCID
+            user_data_set["user1"]["email"],
+            user_data_set["user1"]["orcid"],
+            user_data_set["user1"]["saml_id"],
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            False,
+            True,
+        ),
+        (  # pre-existing user with same email and empty ORCID
+            user_data_set["user1"]["email"],
+            "",
+            user_data_set["user1"]["saml_id"],
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            False,
+            True,
+        ),
+        (  # pre-existing user with different email and same ORCID
+            "other@example.com",
+            user_data_set["user1"]["orcid"],
+            user_data_set["user1"]["saml_id"],
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            False,
+            True,
+        ),
+        (  # already linked user with same email and ORCID
+            user_data_set["user1"]["email"],
+            user_data_set["user1"]["orcid"],
+            user_data_set["user1"]["saml_id"],
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            True,
+            True,
+        ),
+        (  # pre-existing user with different email and empty ORCID but KC username
+            "other@example.com",
+            "",
+            user_data_set["user1"]["saml_id"],
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            False,
+            True,
+        ),
+        (  # pre-existing user with different email, empty ORCID, and empty KC username
+            "other@example.com",
+            "",
+            "",
+            user_data_set["user1"],
+            idp_responses["user1"]["extracted_data"],
+            False,
+            False,
+        ),
+    ],
+)
+def test_knowledgeCommons_account_get_user(
+    running_app,
+    appctx,
+    db,
+    user_factory: Callable,
+    original_email: str,
+    original_orcid: str,
+    original_kc_username: str,
+    user_data: dict,
+    idp_data: dict,
+    already_linked: bool,
+    user_expected: bool,
+    mock_user_data_api: Callable,
+    user_data_to_remote_data: Callable,
+) -> None:
+    """
+    Test the account get user function, which should match a SAML login based on
+    either email or ORCID.
+
+    case 1: The pre-existing KCWorks user has the same email as the IDP response
+    case 2: The pre-existing KCWorks user has a different email as the IDP response
+    case 3: The KCWorks user is already linked to an external ID
+    """
+    app: Flask = running_app.app
+
+    if not already_linked:
+        u: AugmentedUserFixture = user_factory(
+            email=original_email,
+            password="password",
+            saml_id=None,
+            orcid=original_orcid,
+            kc_username=original_kc_username,
+        )
+        assert not u.mock_adapter
+    else:
+        u: AugmentedUserFixture = user_factory(
+            email=original_email,
+            password="password",
+            saml_src="knowledgeCommons",
+            saml_id=user_data["saml_id"],
+            new_remote_data=user_data,
+        )
+    assert u.user is not None
+    app.logger.debug(f"user profile: {u.user.user_profile}")
+
+    matched_user: Optional[User] = knowledgeCommons_account_get_user(idp_data)
+    app.logger.debug(f"matched user: {pformat(matched_user)}")
+
+    if user_expected:
+        assert matched_user is not None
+        assert matched_user.id == u.user.id
+        # email, username, identifier_orcid, identifier_kc_username are not
+        # updated yet on returned User object
+        assert matched_user.email == original_email
+        assert matched_user.username == (
+            None if not already_linked else f"knowledgeCommons-{user_data['saml_id']}"
+        )
+        assert matched_user.user_profile.get("identifier_orcid") == (
+            original_orcid if original_orcid != "" else None
+        )  # not updated yet
+        assert matched_user.user_profile.get("identifier_kc_username") == (
+            original_kc_username if original_kc_username != "" else None
+        )
+    else:
+        assert matched_user is None
+
+
 @pytest.mark.parametrize(
     "user_data,idp_data",
     [
@@ -183,15 +309,15 @@ def test_knowledgeCommons_account_setup(
 @pytest.mark.parametrize(
     "idp_data,user_data,api_call_count",
     [
-        (idp_responses["joanjett"]["raw_data"], user_data_set["joanjett"], 1),
-        (idp_responses["user1"]["raw_data"], user_data_set["user1"], 1),
-        (idp_responses["user2"]["raw_data"], user_data_set["user2"], 1),
+        (idp_responses["joanjett"]["raw_data"], user_data_set["joanjett"], 2),
+        (idp_responses["user1"]["raw_data"], user_data_set["user1"], 2),
+        (idp_responses["user2"]["raw_data"], user_data_set["user2"], 2),
         (
             idp_responses["user3"]["raw_data"],
             user_data_set["user3"],
             2,
-        ),  # IDP response has no email
-        (idp_responses["user4"]["raw_data"], user_data_set["user4"], 1),
+        ),  # IDP response has no email (now making request for everyone)
+        (idp_responses["user4"]["raw_data"], user_data_set["user4"], 2),
     ],
 )
 def test_account_register_on_login(
@@ -259,10 +385,12 @@ def test_account_register_on_login(
     assert user.external_identifiers[0].id == user_data["saml_id"]
     assert user.external_identifiers[0].id_user == user.id
     assert user.external_identifiers[0].method == "knowledgeCommons"
-    assert [r.name for r in user.roles] == (
+    expected_roles = (
         [f"knowledgeCommons---{g['id']}|{g['role']}" for g in user_data["groups"]]
         if "groups" in user_data.keys()
         else []
     )
+    assert all([r for r in user.roles if r.name in expected_roles])
+    assert not any([r for r in user.roles if r.name not in expected_roles])
 
     assert next_url == "https://localhost/next-url.com"
diff --git a/site/tests/api/test_api_import.py b/site/tests/api/test_api_import.py
index 25bc3ecc6..5cec08561 100644
--- a/site/tests/api/test_api_import.py
+++ b/site/tests/api/test_api_import.py
@@ -1,77 +1,1362 @@
-from invenio_access.permissions import authenticated_user
+import copy
+from flask_login import login_user
+from invenio_access.permissions import authenticated_user, system_identity
 from invenio_access.utils import get_identity
+from invenio_accounts.models import User
+from invenio_accounts.proxies import current_accounts
+from invenio_communities.members.records.api import Member
+from invenio_communities.utils import load_community_needs
+from invenio_rdm_records.proxies import current_rdm_records_service as records_service
+from invenio_record_importer_kcworks.proxies import current_record_importer_service
+from invenio_record_importer_kcworks.record_loader import RecordLoader
+from invenio_record_importer_kcworks.types import (
+    FileData,
+    LoaderResult,
+)
+from invenio_vocabularies.proxies import current_service as current_vocabulary_service
+from invenio_vocabularies.records.api import Vocabulary
 import json
 from pathlib import Path
 from pprint import pformat
+import re
+import sys
+from typing import Optional
+from ..fixtures.communities import make_community_member
+from ..fixtures.files import file_md5
+from ..fixtures.records import TestRecordMetadata, TestRecordMetadataWithFiles
+from ..helpers.sample_records import (
+    sample_metadata_chapter_pdf,
+    sample_metadata_chapter2_pdf,
+    # sample_metadata_chapter3_pdf,
+    # sample_metadata_chapter4_pdf,
+    # sample_metadata_chapter5_pdf,
+    # sample_metadata_conference_proceedings_pdf,
+    # sample_metadata_interview_transcript_pdf,
+    sample_metadata_journal_article_pdf,
+    sample_metadata_journal_article2_pdf,
+    # sample_metadata_thesis_pdf,
+    # sample_metadata_white_paper_pdf,
+)
 
 
-def test_import_records(
-    running_app,
-    db,
-    client_with_login,
-    minimal_community_factory,
-    user_factory,
-    minimal_record_metadata,
-    search_clear,
-    mock_send_remote_api_update_fixture,
-):
-    app = running_app.app
-    community = minimal_community_factory()
-    u = user_factory(email="test@example.com", token=True, saml_id=None)
-    token = u.allowed_token
-    identity = get_identity(u.user)
-    identity.provides.add(authenticated_user)
-
-    file_path = (
-        Path(__file__).parent.parent.parent / "tests/helpers/sample_files/sample.pdf"
-    )
-    file_list = [{"key": "sample.pdf"}]
-    minimal_record_metadata["files"] = {"enabled": True, "entries": file_list}
-
-    with app.test_client() as client:
-        with open(
-            file_path,
-            "rb",
-        ) as binary_file_data:
-            binary_file_data.seek(0)
-            response = client.post(
-                f"{app.config['SITE_API_URL']}/import/{community.to_dict()['slug']}",
+class BaseImportLoaderTest:
+    """Base class for testing record imports with different metadata sources."""
+
+    @property
+    def metadata_source(self):
+        """Override this in subclasses to provide specific metadata."""
+        raise NotImplementedError
+
+    def modify_metadata(self, test_metadata: TestRecordMetadata):
+        """Modify the metadata in the metadata source class instance."""
+        pass
+
+    def check_result_status(self, result: LoaderResult):
+        """Check the status of the result."""
+        assert result.status == "new_record"
+
+    def check_result_record_created(
+        self, result: LoaderResult, test_metadata: TestRecordMetadata
+    ):
+        """Do the comparison of the result with the expected metadata."""
+        assert test_metadata.compare_published(result.record_created["record_data"])
+        assert result.record_created["record_data"]["revision_id"] == 3
+
+        assert re.match(
+            r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
+            str(result.record_created["record_uuid"]),
+        )
+        assert result.record_created["status"] == "new_record"
+
+    def check_result_primary_community(self, result: LoaderResult, community: dict):
+        """Check the primary community of the result."""
+        assert result.primary_community["id"] == community["id"]
+        assert result.primary_community["metadata"]["title"] == "My Community"
+        assert result.primary_community["slug"] == "my-community"
+
+    def check_result_existing_record(self, result: LoaderResult):
+        """Check the existing record of the result."""
+        assert result.existing_record == {}
+
+    def check_result_uploaded_files(self, result: LoaderResult):
+        """Check the uploaded files of the result."""
+        assert result.uploaded_files == {}
+
+    def check_result_community_review_result(
+        self, result: LoaderResult, community: dict, test_metadata: TestRecordMetadata
+    ):
+        """Check the community review result of the result."""
+        assert result.community_review_result["is_closed"]
+        assert not result.community_review_result["is_expired"]
+        assert not result.community_review_result["is_open"]
+        assert (
+            result.community_review_result["receiver"]["community"] == community["id"]
+        )
+        assert result.community_review_result["revision_id"] == 4
+        assert result.community_review_result["status"] == "accepted"
+        assert (
+            result.community_review_result["title"]
+            == test_metadata.metadata_in["metadata"]["title"]
+        )
+        assert (
+            result.community_review_result["topic"]["record"]
+            == result.record_created["record_data"]["id"]
+        )
+        assert result.community_review_result["type"] == "community-submission"
+
+    def check_result_assigned_owners(
+        self,
+        result: LoaderResult,
+        user_id: str,
+        test_metadata: TestRecordMetadata,
+        app,
+    ):
+        """Check the assigned owners of the result."""
+        owners = (
+            test_metadata.metadata_in.get("parent", {})
+            .get("access", {})
+            .get("owned_by")
+        )
+        app.logger.debug(f"check_result_assigned_owners: {pformat(owners)}")
+        if owners and result.status == "new_record":
+            owners = [
+                current_accounts.datastore.get_user_by_email(owner["email"])
+                for owner in owners
+            ]
+            app.logger.debug(f"check_result_assigned_owners Users: {pformat(owners)}")
+            assert result.assigned_owners == {
+                "owner_email": owners[0].email,
+                "owner_id": owners[0].id,
+                "owner_type": "user",
+                "access_grants": [
+                    {
+                        "subject": {
+                            "id": str(owner.id),
+                            "type": "user",
+                            "email": owner.email,
+                        },
+                        "permission": "manage",
+                    }
+                    for owner in owners[1:]
+                ],
+            }
+        elif result.status == "new_record":
+            assert result.assigned_owners == {
+                "owner_id": user_id,
+                "owner_email": "test@example.com",
+                "owner_type": "user",
+                "access_grants": [],
+            }
+        else:
+            assert result.assigned_owners == {}
+
+    def check_result_added_to_collections(self, result: LoaderResult):
+        """Check the added to collections of the result."""
+        assert result.added_to_collections == []
+
+    def check_result_submitted(
+        self,
+        result: LoaderResult,
+        test_metadata: TestRecordMetadata,
+        app,
+    ):
+        """Check the submitted of the result."""
+        submitted_data = copy.deepcopy(test_metadata.metadata_in)
+        # Remove the owned_by field from the access dictionary during
+        # record creation because we will be adding it back in later
+        if submitted_data.get("parent", {}).get("access", {}).get("owned_by"):
+            submitted_data["parent"]["access"].pop("owned_by")
+        # Remove the entries field from the files dictionary during
+        # record creation because we will be adding it back in later
+        if submitted_data.get("files", {}).get("entries"):
+            submitted_data["files"].pop("entries")
+        # Add an empty "access" field to the expected submitted data
+        # if it wasn't present in the sample data, since it gets added
+        # by the loader
+        if not submitted_data.get("access"):
+            submitted_data["access"] = {}
+        assert result.submitted["data"] == submitted_data
+        # The test sometimes adds checksums and ids to the input file list
+        # so we need to remove them for the comparison
+        submitted_files = copy.deepcopy(test_metadata.metadata_in["files"])
+        if submitted_files.get("entries"):
+            submitted_files["entries"] = {
+                k: {k: v for k, v in v.items() if k != "checksum" and k != "id"}
+                for k, v in submitted_files["entries"].items()
+            }
+        assert result.submitted["files"] == submitted_files
+        assert result.submitted["owners"] == test_metadata.metadata_in.get(
+            "parent", {}
+        ).get("access", {}).get("owned_by", [])
+
+    def check_result_errors(self, result: LoaderResult):
+        """Check the errors of the result."""
+        assert result.errors == []
+
+    def test_import_records_loader_load(
+        self,
+        db,
+        running_app,
+        search_clear,
+        minimal_community_factory,
+        user_factory,
+        record_metadata,
+        mock_send_remote_api_update_fixture,
+        celery_worker,
+    ):
+        app = running_app.app
+
+        # find the resource type id for "textDocument"
+        rt = current_vocabulary_service.read(
+            system_identity,
+            id_=("resourcetypes", "textDocument-journalArticle"),
+        )
+        app.logger.debug(f"textDocument rec: {pformat(rt.to_dict())}")
+
+        Vocabulary.index.refresh()
+
+        # Search for all resourcetypes
+        search_result = current_vocabulary_service.search(
+            system_identity,
+            type="resourcetypes",
+        )
+        app.logger.debug(f"search_result: {pformat(search_result.to_dict())}")
+
+        # Get the hits from the search result
+        resource_types = search_result.to_dict()["hits"]["hits"]
+
+        # Print each resource type
+        for rt in resource_types:
+            app.logger.debug(
+                f"resource type: ID: {rt['id']}, Title: {rt['title']['en']}"
+            )
+
+        # Get the email of the first owner of the record if owners are specified
+        owners = (
+            self.metadata_source.get("parent", {}).get("access", {}).get("owned_by", [])
+        )
+        if owners:
+            first_user_email = owners[0].get("email")
+        else:
+            first_user_email = "test@example.com"
+        u = user_factory(email=first_user_email, token=True, saml_id=None)
+        user_id = u.user.id
+        identity = get_identity(u.user)
+        identity.provides.add(authenticated_user)
+        login_user(u.user)
+
+        community_record = minimal_community_factory(owner=user_id)
+        community = community_record.to_dict()
+
+        test_metadata = record_metadata(
+            metadata_in=self.metadata_source,
+            community_list=[community],
+            owner_id=user_id,
+        )
+        test_metadata.update_metadata(
+            {
+                "metadata|identifiers": [
+                    {"identifier": "1234567890", "scheme": "import-recid"}
+                ]
+            }
+        )
+        self.modify_metadata(test_metadata)
+
+        for u in (
+            test_metadata.metadata_in.get("parent", {})
+            .get("access", {})
+            .get("owned_by", [])
+        ):
+            if u["email"] != "test@example.com":
+                user_factory(
+                    email=u["email"],
+                    token=False,
+                    saml_id=None,
+                )
+
+        result: LoaderResult = RecordLoader(
+            user_id=user_id, community_id=community["id"]
+        ).load(index=0, import_data=copy.deepcopy(test_metadata.metadata_in))
+
+        assert result.log_object
+        assert result.source_id
+        self.check_result_submitted(result, test_metadata, app)
+        self.check_result_record_created(result, test_metadata)
+        self.check_result_status(result)
+        self.check_result_primary_community(result, community)
+        self.check_result_existing_record(result)
+        self.check_result_uploaded_files(result)
+
+        community.update({"links": {}})  # FIXME: Why are links not expanded?
+
+        self.check_result_community_review_result(result, community, test_metadata)
+        self.check_result_assigned_owners(result, user_id, test_metadata, app)
+        self.check_result_added_to_collections(result)
+        self.check_result_errors(result)
+
+
+# class TestImportLoaderLoadThesisPDF(BaseImportRecordsLoaderLoadTest):
+#     @property
+#     def metadata_source(self):
+#         return sample_metadata_thesis_pdf["input"]
+
+
+# class TestImportLoaderLoadChapterPDF(BaseImportRecordsLoaderLoadTest):
+#     @property
+#     def metadata_source(self):
+#         return sample_metadata_chapter_pdf["input"]
+
+
+# class TestImportLoaderLoadChapter2PDF(BaseImportRecordsLoaderLoadTest):
+#     @property
+#     def metadata_source(self):
+#         return sample_metadata_chapter2_pdf["input"]
+
+
+class TestImportLoaderJArticle(BaseImportLoaderTest):
+
+    @property
+    def metadata_source(self):
+        return copy.deepcopy(sample_metadata_journal_article_pdf["input"])
+
+
+class BaseImportLoaderErrorTest(BaseImportLoaderTest):
+    """Base class for testing record imports with errors."""
+
+    def check_result_status(self, result: LoaderResult):
+        """Check the status of the result."""
+        assert result.status == "error"
+
+    def check_result_record_created(
+        self, result: LoaderResult, test_metadata: TestRecordMetadata
+    ):
+        """Check the record created of the result."""
+        assert result.record_created == {
+            "record_data": {},
+            "record_uuid": "",
+            "status": "deleted",
+        }
+
+    def check_result_community_review_result(
+        self, result: LoaderResult, community: dict, test_metadata: TestRecordMetadata
+    ):
+        """Check the community review result of the result."""
+        assert result.community_review_result == {}
+
+
+class TestImportLoaderJArticleErrorTitle(BaseImportLoaderErrorTest):
+    """Test importing a journal article with an empty title."""
+
+    @property
+    def metadata_source(self):
+        return copy.deepcopy(sample_metadata_journal_article_pdf["input"])
+
+    def modify_metadata(self, test_metadata: TestRecordMetadata):
+        test_metadata.update_metadata({"metadata|title": ""})
+
+    def check_result_errors(self, result: LoaderResult):
+        """Check the errors of the result."""
+        assert result.errors == [
+            {
+                "validation_error": {
+                    "metadata": {"title": ["Missing data for required field."]}
+                }
+            }
+        ]
+
+
+class TestImportLoaderJArticleErrorIDScheme(BaseImportLoaderErrorTest):
+    """Test importing a journal article with an empty title."""
+
+    @property
+    def metadata_source(self):
+        return copy.deepcopy(sample_metadata_journal_article_pdf["input"])
+
+    def modify_metadata(self, test_metadata: TestRecordMetadata):
+        test_metadata.update_metadata(
+            {
+                "metadata|identifiers": [
+                    {"identifier": "hc:33383", "scheme": "my-made-up-scheme"},
+                    {"identifier": "1234567890", "scheme": "import-recid"},
+                ]
+            }
+        )
+
+    def check_result_errors(self, result: LoaderResult):
+        """Check the errors of the result."""
+        assert result.errors == [
+            {
+                "validation_error": {
+                    "metadata": {"identifiers": {0: {"scheme": "Invalid scheme."}}}
+                }
+            }
+        ]
+
+
+class BaseImportLoaderWithFilesTest(BaseImportLoaderTest):
+    """Base class for testing record imports with files."""
+
+    def check_result_uploaded_files(self, result: LoaderResult):
+        """Check the uploaded files of the result."""
+        assert result.uploaded_files == {
+            "sample.jpg": ["uploaded", []],
+            "sample.pdf": ["uploaded", []],
+        }
+
+    def check_result_record_created(
+        self, result: LoaderResult, test_metadata: TestRecordMetadata
+    ):
+        """Check the record created of the result."""
+        assert test_metadata.compare_published(result.record_created["record_data"])
+        # assert result.record_created["record_data"]["revision_id"] == 4
+        # FIXME: sometimes 3, sometimes 4
+
+    def test_import_records_loader_load(
+        self,
+        running_app,
+        db,
+        search_clear,
+        minimal_community_factory,
+        user_factory,
+        record_metadata_with_files,
+        mock_send_remote_api_update_fixture,
+        celery_worker,
+    ):
+        app = running_app.app
+
+        # find the resource type id for "textDocument"
+        rt = current_vocabulary_service.read(
+            system_identity,
+            id_=("resourcetypes", "textDocument-journalArticle"),
+        )
+        app.logger.debug(f"textDocument rec: {pformat(rt.to_dict())}")
+
+        Vocabulary.index.refresh()
+        # Search for all resourcetypes
+        search_result = current_vocabulary_service.search(
+            system_identity,
+            type="resourcetypes",
+        )
+        app.logger.debug(f"search_result: {pformat(search_result.to_dict())}")
+
+        # Get the hits from the search result
+        resource_types = search_result.to_dict()["hits"]["hits"]
+
+        # Print each resource type
+        for rt in resource_types:
+            app.logger.debug(
+                f"resource type: ID: {rt['id']}, Title: {rt['title']['en']}"
+            )
+
+        u = user_factory(email="test@example.com", token=True, saml_id=None)
+        user_id = u.user.id
+        identity = get_identity(u.user)
+        identity.provides.add(authenticated_user)
+        login_user(u.user)
+
+        community_record = minimal_community_factory(owner=user_id)
+        community = community_record.to_dict()
+
+        file_paths = [
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample.pdf",
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample.jpg",
+        ]
+        file1 = open(file_paths[0], "rb")
+        file2 = open(file_paths[1], "rb")
+        files = [
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample.pdf"
+                ),
+                stream=file1,
+                content_type="application/pdf",
+                mimetype="application/pdf",
+                mimetype_params={},
+            ),
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample.jpg"
+                ),
+                stream=file2,
+                content_type="image/jpeg",
+                mimetype="image/jpeg",
+                mimetype_params={},
+            ),
+        ]
+        file_list = [
+            {
+                "key": "sample.pdf",
+                "mimetype": "application/pdf",
+                "size": 13264,  # FIXME: Check reporting of mismatch
+            },
+            {
+                "key": "sample.jpg",
+                "mimetype": "image/jpeg",
+                "size": 1174188,
+            },
+        ]
+        file_entries = {f["key"]: f for f in file_list}
+
+        test_metadata = record_metadata_with_files(
+            metadata_in=self.metadata_source,
+            community_list=[community],
+            owner_id=user_id,
+            file_entries=file_entries,
+        )
+        test_metadata.update_metadata(
+            {
+                "metadata|identifiers": [
+                    {"identifier": "hc:33383", "scheme": "import-recid"}
+                ]
+            }
+        )
+        for u in (
+            test_metadata.metadata_in.get("parent", {})
+            .get("access", {})
+            .get("owned_by", [])
+        ):
+            if u["email"] != "test@example.com":
+                user_factory(
+                    email=u["email"],
+                    token=False,
+                    saml_id=None,
+                )
+
+        # Create group communities
+        for g in test_metadata.metadata_in.get("custom_fields", {}).get(
+            "hclegacy:groups_for_deposit", []
+        ):
+            minimal_community_factory(
+                slug=g["group_name"].lower().replace(" ", "-"),
+                custom_fields={
+                    "kcr:commons_group_id": g["group_identifier"],
+                    "kcr:commons_group_name": g["group_name"],
+                },
+            )
+
+        result: LoaderResult = RecordLoader(
+            user_id=user_id, community_id=community["id"]
+        ).load(
+            index=0,
+            import_data=copy.deepcopy(test_metadata.metadata_in),
+            files=files,
+        )
+        file1.close()
+        file2.close()
+
+        record_created_id = result.record_created["record_data"]["id"]
+
+        # add ids and checksums from actual file entries to the expected file entries
+        for k, f in file_entries.items():
+            f["id"] = result.record_created["record_data"]["files"]["entries"][k]["id"]
+            f["checksum"] = result.record_created["record_data"]["files"]["entries"][k][
+                "checksum"
+            ]
+        test_metadata.file_entries = file_entries
+        test_metadata.record_id = record_created_id
+
+        self.check_result_status(result)
+        self.check_result_primary_community(result, community)
+        self.check_result_existing_record(result)
+        self.check_result_record_created(result, test_metadata)
+        self.check_result_uploaded_files(result)
+        self.check_result_community_review_result(result, community, test_metadata)
+        self.check_result_assigned_owners(result, user_id, test_metadata, app)
+        self.check_result_added_to_collections(result)
+        self.check_result_submitted(result, test_metadata, app)
+        self.check_result_errors(result)
+        assert result.log_object
+        assert result.source_id
+
+        # now check the record in the database/search
+        rdm_record = records_service.read(
+            system_identity, id_=record_created_id
+        ).to_dict()
+        assert rdm_record["files"] == {
+            k: v
+            for k, v in test_metadata.published["files"].items()
+            if k != "default_preview"
+        }
+
+        # ensure the files can be downloaded
+        with app.test_client() as client:
+            with open(file_paths[1], "rb") as file2:
+                file_bytes = file2.read()
+                file_response2 = client.get(
+                    f"{app.config['SITE_API_URL']}/records/{record_created_id}/files/"
+                    "sample.jpg/content"
+                )
+                assert file_response2.status_code == 200
+                assert (
+                    "inline" in file_response2.headers["Content-Disposition"]
+                )  # FIXME: why not attachment?
+                assert file_response2.headers["Content-MD5"] == file_md5(
+                    file_response2.data
+                )
+                assert file_response2.headers["Content-MD5"] == file_md5(file_bytes)
+                assert file_response2.content_type == "image/jpeg"
+                assert file_response2.content_length == 1174188
+                assert sys.getsizeof(file_response2.data) == sys.getsizeof(file_bytes)
+                # assert file_response2.data == file2.read()
+
+            with open(file_paths[0], "rb") as file1:
+                file_bytes = file1.read()
+                file_response1 = client.get(
+                    f"{app.config['SITE_API_URL']}/records/{record_created_id}/files/"
+                    "sample.pdf/content"
+                )
+                assert file_response1.status_code == 200
+                assert file_response1.headers["Content-MD5"] == file_md5(
+                    file_response1.data
+                )
+                assert file_response1.headers["Content-MD5"] == file_md5(file_bytes)
+                assert "sample.pdf" in file_response1.headers["Content-Disposition"]
+                assert (
+                    file_response1.content_type == "application/octet-stream"
+                )  # FIXME: why not application/pdf?
+                assert file_response1.content_length == 13264
+                assert sys.getsizeof(file_response1.data) == sys.getsizeof(file_bytes)
+                # assert file_response1.data == file1.read()
+
+        file1.close()
+        file2.close()
+
+
+# class TestImportLoaderLoadWithFilesChapterPDF(BaseImportLoaderWithFilesTest):
+#     @property
+#     def metadata_source(self):
+#         return copy.deepcopy(sample_metadata_chapter_pdf["input"])
+
+
+# class TestImportLoaderLoadWithFilesChapter2PDF(BaseImportLoaderWithFilesTest):
+#     @property
+#     def metadata_source(self):
+#         return copy.deepcopy(sample_metadata_chapter2_pdf["input"])
+
+
+class TestImportLoaderWithFilesJArticle(BaseImportLoaderWithFilesTest):
+    @property
+    def metadata_source(self):
+        return copy.deepcopy(sample_metadata_journal_article_pdf["input"])
+
+
+class BaseImportServiceTest:
+    """Base class for testing record imports with the service."""
+
+    @property
+    def by_api(self):
+        return False
+
+    @property
+    def community_access_override(self):
+        return {}
+
+    def make_submitter(self, user_factory, community_id):
+        return None, None
+
+    @property
+    def metadata_sources(self):
+        """Override this in subclasses to provide specific metadata."""
+        raise NotImplementedError
+
+    @property
+    def files_to_upload(self):
+        """Override this in subclasses to provide different files to upload.
+
+        The default defined here assumes two input records with two files each.
+        """
+
+        file_paths = [
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample.pdf",
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample.jpg",
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample2.pdf",
+            Path(__file__).parent.parent.parent
+            / "tests/helpers/sample_files/sample.csv",
+        ]
+        file1 = open(file_paths[0], "rb")
+        file2 = open(file_paths[1], "rb")
+        file3 = open(file_paths[2], "rb")
+        file4 = open(file_paths[3], "rb")
+        files = [
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample.pdf"
+                ),
+                stream=file1,
+                content_type="application/pdf",
+                mimetype="application/pdf",
+                mimetype_params={},
+            ),
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample.jpg"
+                ),
+                stream=file2,
+                content_type="image/jpeg",
+                mimetype="image/jpeg",
+                mimetype_params={},
+            ),
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample2.pdf"
+                ),
+                stream=file3,
+                content_type="application/pdf",
+                mimetype="application/pdf",
+                mimetype_params={},
+            ),
+            FileData(
+                filename=str(
+                    Path(__file__).parent.parent.parent
+                    / "tests/helpers/sample_files/sample.csv"
+                ),
+                stream=file4,
+                content_type="text/csv",
+                mimetype="text/csv",
+                mimetype_params={},
+            ),
+        ]
+        file_list = [
+            {
+                "key": "sample.pdf",
+                "mimetype": "application/pdf",
+                "size": 13264,  # FIXME: Check reporting of mismatch
+            },
+            {
+                "key": "sample.jpg",
+                "mimetype": "image/jpeg",
+                "size": 1174188,
+            },
+            {
+                "key": "sample2.pdf",
+                "mimetype": "application/pdf",
+                "size": 13264,  # FIXME: Check reporting of mismatch
+            },
+            {
+                "key": "sample.csv",
+                "mimetype": "text/csv",
+                "size": 17261,
+            },
+        ]
+        file_streams = [file1, file2, file3, file4]
+        return files, file_list, file_streams
+
+    @property
+    def expected_errors(self):
+        """Override this in subclasses to provide specific expected errors.
+
+        The expected errors should be a list of lists, where each inner list
+        contains the expected errors for a record. If the record is expected to
+        succeed, the inner list should be empty. The outer list should have the
+        same length as the metadata sources.
+        """
+        return [[]] * len(self.metadata_sources)
+
+    def check_result_status(
+        self, import_results: dict, status_code: Optional[int]
+    ) -> bool:
+        """Check the status of the import results.
+
+        Returns True if the status is as expected, False otherwise.
+
+        The boolean return is to allow for short-circuiting the main
+        test execution if the status reflects a response that will lack
+        a body with data or errors (e.g., 403).
+        """
+        if not any([e for e in self.expected_errors if e]):
+            if self.by_api:
+                assert status_code == 201
+            assert len(import_results["data"]) == len(self.metadata_sources)
+            assert import_results.get("status") == "success"
+            assert (
+                import_results.get("message")
+                == "All records were successfully imported"
+            )
+        else:
+            if self.by_api:
+                assert status_code == 400
+            assert len(import_results["data"]) == 0
+            assert import_results.get("status") == "error"
+
+            # if only some records are expected to fail
+            if len([e for e in self.expected_errors if e]) < len(self.metadata_sources):
+                assert import_results.get("message") == (
+                    "Some records could not be imported, and the 'all_or_none' flag "
+                    "was set to True, so the import was aborted and no records were "
+                    "created. Please check the list of failed records in the "
+                    "'errors' field for more information. Each failed item should have "
+                    "its own list of specific errors."
+                )
+            # if all records are expected to fail
+            else:
+                assert import_results.get("message") == (
+                    "No records were successfully imported. Please check the list of "
+                    "failed records in the 'errors' field for more information. Each "
+                    "failed item should have its own list of specific errors."
+                )
+        return True
+
+    def _check_response_files(self, actual_files, record_files):
+        assert actual_files == {
+            f.filename.split("/")[-1]: ["uploaded", []] for f in record_files
+        }
+
+    def check_result_errors(self, import_results: dict) -> None:
+        if not any([e for e in self.expected_errors if e]):
+            assert import_results.get("errors") == []
+            return
+        error_item_indices = [
+            index for index, error in enumerate(self.expected_errors) if error
+        ]
+        assert len(import_results["errors"]) == len(error_item_indices)
+        for i, actual_error_item in enumerate(import_results["errors"]):
+            assert actual_error_item["item_index"] == error_item_indices[i]
+            assert (
+                actual_error_item["errors"]
+                == self.expected_errors[error_item_indices[i]]
+            )
+            if actual_error_item[
+                "metadata"
+            ]:  # make sure metadata present when expected
+                item_recid = actual_error_item["metadata"]["id"]
+                assert item_recid not in [
+                    r["metadata"]["id"] for r in import_results["data"]
+                ]
+                created_record = records_service.read(
+                    system_identity, id_=item_recid
+                ).to_dict()
+                assert created_record["status"] == "deleted"
+
+    def _check_owners_in_community(
+        self,
+        community_members: list[Member],
+        user: User,
+        uploader_id: int,
+    ):
+        target_roles = (
+            ["reader"] if user.id != uploader_id else ["curator", "manager", "owner"]
+        )
+        self.app.logger.debug(
+            f"user.id: {user.id}, type(user.id): {type(user.id)}, uploader_id: {uploader_id}, type(uploader_id): {type(uploader_id)}, target_roles: {target_roles}"
+        )
+        self.app.logger.debug(
+            f"community_members: {pformat([(m.user_id, type(m.user_id), m.role) for m in community_members])}"
+        )
+        matching_ids = [m for m in community_members if m.user_id == user.id]
+        self.app.logger.debug(
+            f"matching_ids: {pformat([(m.user_id, m.role) for m in matching_ids])}"
+        )
+        assert matching_ids
+        assert matching_ids[0].role in target_roles
+        assert len(matching_ids) == 1
+
+    def _check_owners(
+        self,
+        actual_metadata: dict,
+        expected: TestRecordMetadataWithFiles,
+        uploader_id: str,
+        community_id: str,
+    ):
+        expected_owners = (
+            expected.metadata_in.get("parent", {}).get("access", {}).get("owned_by")
+        )
+        if expected_owners:
+            community_members = Member.get_members(community_id)
+            self.app.logger.debug(f"community_members: {pformat(community_members)}")
+            self.app.logger.debug(
+                f"community_members: {pformat([(m.user_id, m.role) for m in community_members])}"
+            )
+            first_expected_owner = expected.metadata_in["parent"]["access"]["owned_by"][
+                0
+            ]
+            first_actual_owner = current_accounts.datastore.get_user_by_id(
+                actual_metadata["parent"]["access"]["owned_by"]["user"]
+            )
+            assert first_actual_owner.email == first_expected_owner["email"]
+            self._check_owners_in_community(
+                community_members, first_actual_owner, uploader_id
+            )
+            if len(expected_owners) > 1:
+                other_expected_owners = expected.metadata_in["parent"]["access"][
+                    "owned_by"
+                ][1:]
+                other_actual_owners = actual_metadata["parent"]["access"]["grants"]
+                for oe, oa in zip(other_expected_owners, other_actual_owners):
+                    user = current_accounts.datastore.get_user_by_email(oe["email"])
+                    assert oa["subject"]["id"] == str(user.id)
+                    assert user.email == oe["email"]
+
+                    if oe.get("identifiers"):
+                        kc_username = next(
+                            (
+                                i["identifier"]
+                                for i in oe["identifiers"]
+                                if i["scheme"] == "kc_username"
+                            ),
+                            None,
+                        )
+                        orcid = next(
+                            (
+                                i["identifier"]
+                                for i in oe["identifiers"]
+                                if i["scheme"] == "orcid"
+                            ),
+                            None,
+                        )
+                        neh_id = next(
+                            (
+                                i["identifier"]
+                                for i in oe["identifiers"]
+                                if i["scheme"] == "neh_user_id"
+                            ),
+                            None,
+                        )
+                        import_id = next(
+                            (
+                                i["identifier"]
+                                for i in oe["identifiers"]
+                                if i["scheme"] == "import_user_id"
+                            ),
+                            None,
+                        )
+                        if kc_username:
+                            assert user.username in [
+                                kc_username,
+                                f"knowledgeCommons-{kc_username}",
+                            ]
+                        if orcid:
+                            assert user.user_profile["identifier_orcid"] == orcid
+                        if neh_id:
+                            other_user_ids = json.loads(
+                                user.user_profile["identifier_other"]
+                            )
+                            assert neh_id in other_user_ids.values()
+                        if import_id:
+                            other_user_ids = json.loads(
+                                user.user_profile["identifier_other"]
+                            )
+                            assert import_id in other_user_ids.values()
+                    # make sure they were added to the community
+                    # as reader (unless they are the uploader)
+                    self._check_owners_in_community(
+                        community_members, user, uploader_id
+                    )
+        else:
+            assert actual_metadata["parent"]["access"]["owned_by"] == {
+                "user": uploader_id
+            }
+            assert actual_metadata["parent"]["access"]["grants"] == []
+
+    def _check_successful_import(
+        self,
+        actual: dict,
+        record_files: list,
+        expected: TestRecordMetadataWithFiles,
+        community: dict,
+        uploader_id: str,
+    ):
+        assert self.app
+        actual_metadata = actual.get("metadata")
+        assert actual_metadata
+
+        actual_import_id = actual.get("source_id")
+        assert actual_import_id == next(
+            i.get("identifier")
+            for i in actual_metadata.get("metadata", {}).get("identifiers")
+            if i.get("scheme") == "import-recid"
+        )
+
+        actual_record_id = actual_metadata.get("id")
+        assert actual_record_id == actual.get("record_id")
+
+        actual_record_url = actual.get("record_url")
+        assert actual_record_url == (
+            f"{self.app.config['SITE_UI_URL']}/records/{actual_record_id}"
+        )
+
+        actual_collection_id = actual.get("collection_id")
+        assert actual_collection_id in [community["id"], community["slug"]]
+        assert actual_collection_id == actual_metadata.get("parent", {}).get(
+            "communities", {}
+        ).get("entries", [])[0].get("id")
+
+        assert actual.get("errors") == []
+
+        # comparing file list separately from file entries in metadata
+        self._check_response_files(actual.get("files"), record_files)
+
+        # add ids and checksums from actual file entries to the expected
+        # file entries to compare file entries in metadata
+        for k, f in expected.file_entries.items():
+            f["id"] = actual_metadata["files"]["entries"][k]["id"]
+            f["checksum"] = actual_metadata["files"]["entries"][k]["checksum"]
+        assert expected.compare_published(actual_metadata)
+
+        self._check_owners(actual_metadata, expected, uploader_id, community["id"])
+
+        # Check the record in the database
+        record_id1 = actual_metadata.get("id")
+        rdm_record = records_service.read(system_identity, id_=record_id1).to_dict()
+        assert expected.compare_published(rdm_record)
+
+    def _check_failed_import(
+        self, import_result: dict, expected_error_list: list[dict]
+    ):
+        assert import_result["status"] == "error"
+        assert import_result.get("errors") == expected_error_list
+
+    def check_result_data(
+        self,
+        import_results: dict,
+        files: list,
+        metadata_sources: list,
+        community: dict,
+        uploader_id: str,
+    ) -> None:
+        assert self.app
+        expected_error_count = len([e for e in self.expected_errors if e])
+        if expected_error_count > 0:
+            assert len(import_results["data"]) == 0
+            return
+        assert (
+            len(import_results["data"]) == len(metadata_sources) - expected_error_count
+        )
+        files_per_item = len(files) // len(metadata_sources)
+        for idx, actual_record_result in enumerate(import_results["data"]):
+
+            expected_error_list = self.expected_errors[idx]
+            assert actual_record_result["item_index"] == idx
+
+            if expected_error_list:
+                self._check_failed_import(actual_record_result, expected_error_list)
+            else:
+                record_files = files[
+                    idx * files_per_item : (idx + 1) * files_per_item  # noqa: E203
+                ]
+
+                self._check_successful_import(
+                    actual_record_result,
+                    record_files,
+                    metadata_sources[idx],
+                    community,
+                    uploader_id,
+                )
+
+    def _do_api_import(
+        self,
+        community: dict,
+        file_streams: list,
+        token: str,
+        metadata_source_objects: list[TestRecordMetadataWithFiles],
+    ) -> tuple[Optional[dict], int]:
+        assert self.app
+        with self.app.test_client() as client:
+            actual_response = client.post(
+                f"{self.app.config['SITE_API_URL']}/import/{community['slug']}",
                 content_type="multipart/form-data",
                 data={
-                    "metadata": json.dumps(minimal_record_metadata),
+                    "metadata": json.dumps(
+                        [copy.deepcopy(m.metadata_in) for m in metadata_source_objects]
+                    ),
+                    "id_scheme": "import-recid",
                     "review_required": "true",
                     "strict_validation": "true",
                     "all_or_none": "true",
-                    "files": [
-                        (
-                            file_path,
-                            "sample.pdf",
-                            "application/pdf",
-                        )
-                    ],
+                    "files": file_streams,
                 },
                 headers={
                     "Content-Type": "multipart/form-data",
                     "Authorization": f"Bearer {token}",
                 },
             )
-        print(response.text)
-        assert response.status_code == 201
-        assert response.json == {"status": "success", "data": []}
 
+        return actual_response.json, actual_response.status_code
 
-# import requests
+    def test_import_records_service_load(
+        self,
+        running_app,
+        db,
+        minimal_community_factory,
+        user_factory,
+        search_clear,
+        mock_send_remote_api_update_fixture,
+    ):
+        self.app = running_app.app
+        u = user_factory(email="test@example.com", token=True, saml_id=None)
+        user_id = u.user.id
+        identity = get_identity(u.user)
+        identity.provides.add(authenticated_user)
 
-# url = "https://works.hcommons.org/api/import"
+        # FIXME: We need to actually create a KC account for the users
+        # assigned as owners, not just a KCWorks account. Or maybe send
+        # them an email with a link to create a KC account with the same
+        # email address?
 
-# payload = {'collection': 'mlacommons'}
-# files=[
-#   ('file1',('Test.pdf',open('/Users/ianscott/Downloads/Test.pdf','rb'),'application/pdf'))
-# ]
-# headers = {
-#   'Cookie': 'SimpleSAMLCommons=41b2316ef1cefa7c21fa257f50b95b1b'
-# }
+        community_record = minimal_community_factory(
+            owner=u.user.id,
+            access=self.community_access_override,
+        )
+        community = community_record.to_dict()
 
-# response = requests.request("POST", url, headers=headers, data=payload, files=files)
+        submitter_identity, submitter_token = self.make_submitter(
+            user_factory, community["id"]
+        )
+        if not submitter_identity:
+            submitter_identity, submitter_token = identity, u.allowed_token
 
-# print(response.text)
+        if not self.by_api:
+            login_user(submitter_identity.user)
+
+        # Remember to close the file streams after the import is complete
+        files, file_list, file_streams = self.files_to_upload
+        files_per_item = len(file_list) // len(self.metadata_sources)
+
+        metadata_source_objects = []
+        for idx, metadata_source in enumerate(self.metadata_sources):
+            item_files = file_list[
+                idx * files_per_item : (idx + 1) * files_per_item  # noqa: E203
+            ]
+            file_entries = {f["key"]: f for f in item_files}
+            test_metadata = TestRecordMetadataWithFiles(
+                metadata_in=metadata_source,
+                community_list=[community],
+                owner_id=u.user.id,
+                file_entries=file_entries,
+            )
+
+            test_metadata.update_metadata(
+                {
+                    "metadata|identifiers": [
+                        {
+                            "identifier": f"1234567890{str(idx)}",
+                            "scheme": "import-recid",
+                        }
+                    ]
+                }
+            )
+            metadata_source_objects.append(test_metadata)
+
+        if self.by_api and submitter_token:
+            import_results, status_code = self._do_api_import(
+                community,
+                file_streams,
+                submitter_token,
+                metadata_source_objects,
+            )
+        else:
+            load_community_needs(identity)
+            service = current_record_importer_service
+            import_results = service.import_records(
+                identity=submitter_identity,
+                file_data=files,
+                metadata=[
+                    copy.deepcopy(m.metadata_in) for m in metadata_source_objects
+                ],
+                community_id=community["id"],
+            )
+            status_code = None
+
+        for file in file_streams:
+            file.close()
+
+        assert import_results is not None
+        if self.check_result_status(import_results, status_code):
+            self.check_result_errors(import_results)
+            self.check_result_data(
+                import_results,
+                files,
+                metadata_source_objects,
+                community,
+                user_id,
+            )
+
+
+# class TestImportServiceChapter(BaseImportRecordsServiceLoadTest):
+#     @property
+#     def metadata_source(self):
+#         return sample_metadata_chapter_pdf["input"]
+
+
+# class TestImportServiceChapter2(BaseImportRecordsServiceLoadTest):
+#     @property
+#     def metadata_source(self):
+#         return sample_metadata_chapter2_pdf["input"]
+
+
+class TestImportServiceJArticleSuccess(BaseImportServiceTest):
+    @property
+    def metadata_sources(self):
+        return [
+            copy.deepcopy(sample_metadata_journal_article_pdf["input"]),
+            copy.deepcopy(sample_metadata_journal_article2_pdf["input"]),
+        ]
+
+
+class TestImportServiceJArticleErrorTitle(BaseImportServiceTest):
+    @property
+    def metadata_sources(self):
+        meta1 = copy.deepcopy(sample_metadata_chapter_pdf["input"])
+        meta1["metadata"]["title"] = ""
+        meta2 = copy.deepcopy(sample_metadata_chapter2_pdf["input"])
+        return [meta1, meta2]
+
+    @property
+    def expected_errors(self):
+        return [
+            [
+                {
+                    "validation_error": {
+                        "metadata": {"title": ["Missing data for required field."]}
+                    }
+                }
+            ],
+            [],
+        ]
+
+
+class TestImportServiceJArticleErrorMissingFile(BaseImportServiceTest):
+    @property
+    def metadata_sources(self):
+        meta1 = copy.deepcopy(sample_metadata_chapter_pdf["input"])
+        meta1["metadata"]["title"] = ""
+        meta2 = copy.deepcopy(sample_metadata_chapter2_pdf["input"])
+        return [meta1, meta2]
+
+    @property
+    def files_to_upload(self):
+        """Override the default files to upload to remove the first file.
+
+        The first record should fail now, even though the second record
+        is the one with the invalid metadata.
+        """
+        files, file_list, file_streams = super().files_to_upload
+        file_streams[0].close()
+        file_streams = file_streams[1:]
+        files = files[1:]
+        # leave the file list the same so that there's a mismatch between
+        # the files and the file list (entries)
+        return files, file_list, file_streams
+
+    @property
+    def expected_errors(self):
+        """
+        The first record should fail because the file is missing.
+        The second record should fail because the metadata is invalid.
+        """
+        return [
+            [
+                {
+                    "file upload failures": {
+                        "sample.pdf": [
+                            "failed",
+                            ["File sample.pdf not found in list of files."],
+                        ]
+                    },
+                },
+            ],
+        ]
+
+
+class TestImportAPIJournalArticle(BaseImportServiceTest):
+    """Test importing two journal articles via the API with no errors."""
+
+    @property
+    def by_api(self):
+        return True
+
+    @property
+    def metadata_sources(self):
+        return [
+            copy.deepcopy(sample_metadata_journal_article_pdf["input"]),
+            copy.deepcopy(sample_metadata_journal_article2_pdf["input"]),
+        ]
+
+
+class BaseInsufficientPermissionsTest(TestImportAPIJournalArticle):
+    """Base class for tests that check the API with insufficient permissions."""
+
+    def check_result_status(self, import_results: dict, status_code: Optional[int]):
+        if self.by_api:
+            assert status_code == 403
+        assert import_results.get("message") == (
+            "The user does not have the necessary permissions to "
+            "import records via this endpoint."
+        )
+        return False  # to stop the test execution from looking for data/errors
+
+
+class TestImportAPIInsufficientPermissionsReader(BaseInsufficientPermissionsTest):
+    """Test importing records via the API with insufficient permissions.
+
+    The community allows direct publishing by curators and managers
+    (review policy "open"), but uploader is only a reader.
+    """
+
+    @property
+    def community_access_override(self):
+        return {"review_policy": "open", "record_policy": "open"}
+
+    def make_submitter(self, user_factory, community_id):
+        """Try using API with a user that is just a "reader" in the community."""
+        new_user = user_factory(email="another@example.com", token=True, saml_id=None)
+        make_community_member(new_user.user.id, "reader", community_id)
+        return new_user.user.id, new_user.allowed_token
+
+
+class TestImportAPIInsufficientPermissionsCurator(BaseInsufficientPermissionsTest):
+    """Test importing records via the API with insufficient permissions.
+
+    The community does not allow direct publishing (review policy "closed"),
+    and uploader is only a manager.
+    """
+
+    @property
+    def community_access_override(self):
+        return {"review_policy": "closed", "record_policy": "closed"}
+
+    def make_submitter(self, user_factory, community_id):
+        """Try using API with a user that is just a "reader" in the community."""
+        new_user = user_factory(email="another@example.com", token=True, saml_id=None)
+        make_community_member(new_user.user.id, "manager", community_id)
+        return new_user.user.id, new_user.allowed_token
+
+
+class TestImportAPIInsufficientPermissionsOwner(BaseInsufficientPermissionsTest):
+    """Test importing records via the API with insufficient permissions.
+
+    The community allows direct publishing (review policy "open"),
+    but uploader is not a community member.
+    """
+
+    @property
+    def community_access_override(self):
+        return {"review_policy": "open", "record_policy": "open"}
+
+    def make_submitter(self, user_factory, community_id):
+        """Try using API with a user that is not a community member."""
+        new_user = user_factory(email="another@example.com", token=True, saml_id=None)
+        return new_user.user.id, new_user.allowed_token
+
+
+class TestImportAPIJournalArticleErrorTitle(TestImportServiceJArticleErrorTitle):
+    @property
+    def by_api(self):
+        return True
+
+
+class TestImportAPIJournalArticleErrorMissingFile(
+    TestImportServiceJArticleErrorMissingFile
+):
+    @property
+    def by_api(self):
+        return True
diff --git a/site/tests/api/test_api_notifications.py b/site/tests/api/test_api_notifications.py
index 56d352d25..6fa747e53 100644
--- a/site/tests/api/test_api_notifications.py
+++ b/site/tests/api/test_api_notifications.py
@@ -25,6 +25,7 @@
 from kcworks.proxies import current_internal_notifications
 from pprint import pformat
 import time
+from ..fixtures.records import TestRecordMetadata
 
 
 def test_notify_for_request_acceptance(
@@ -32,7 +33,6 @@ def test_notify_for_request_acceptance(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -77,12 +77,14 @@ def test_notify_for_request_acceptance(
     # assert user.user_profile.get("full_name") == "Test User"
     assert user.user_profile.get("unread_notifications", "null") == "null"
 
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
         # logged_in_client, _ = client_with_login(client, user)
         logged_in_client = client
         response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -244,7 +246,6 @@ def test_notify_for_request_decline(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -289,12 +290,14 @@ def test_notify_for_request_decline(
     # assert user.user_profile.get("full_name") == "Test User"
     assert user.user_profile.get("unread_notifications", "null") == "null"
 
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
         # logged_in_client, _ = client_with_login(client, user)
         logged_in_client = client
         response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -455,7 +458,6 @@ def test_notify_for_request_cancellation(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -489,12 +491,14 @@ def test_notify_for_request_cancellation(
     # assert user.user_profile.get("full_name") == "Test User"
     assert user.user_profile.get("unread_notifications", "null") == "null"
 
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
         # logged_in_client, _ = client_with_login(client, user)
         logged_in_client = client
         response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -634,7 +638,6 @@ def test_notify_for_new_request_comment(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -667,10 +670,12 @@ def test_notify_for_new_request_comment(
     token = u.allowed_token
     assert user.user_profile.get("unread_notifications", "null") == "null"
 
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
         response = client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -834,7 +839,6 @@ def test_read_unread_notifications_by_service(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -912,7 +916,6 @@ def test_clear_unread_notifications_by_service(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -1127,7 +1130,6 @@ def test_clear_unread_notifications_by_view(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -1235,7 +1237,6 @@ def test_clear_one_unread_notification_by_view(
     db,
     user_factory,
     minimal_community_factory,
-    minimal_record_metadata,
     client,
     client_with_login,
     headers,
@@ -1398,7 +1399,6 @@ def test_unread_endpoint_bad_methods(
 def test_notification_on_first_upload(
     running_app,
     user_factory,
-    minimal_record_metadata,
     db,
     search_clear,
     client,
@@ -1456,10 +1456,12 @@ def test_notification_on_first_upload(
     login_user(user)
     login_user_via_session(client, email=user.email)
 
+    metadata = TestRecordMetadata(app=app)
+
     # Create the first draft
     draft1_response = client.post(
         f"{app.config['SITE_API_URL']}/records",
-        data=json.dumps(minimal_record_metadata),
+        data=json.dumps(metadata.metadata_in),
         headers={**headers, "Authorization": f"Bearer {token}"},
     )
     assert draft1_response.status_code == 201
@@ -1484,9 +1486,8 @@ def test_notification_on_first_upload(
         f"'{app.config.get('SITE_UI_URL')}/records/{first_draft_id}'>"
         f"View draft</a>)" in email.html
     )
-    assert f"Draft title: {minimal_record_metadata['metadata']['title']}" in email.body
-    assert f"Draft title: {minimal_record_metadata['metadata']['title']}" in email.html
-    # assert f"Full metadata: {minimal_record_metadata}" in email.body
+    assert f"Draft title: {metadata.draft['metadata']['title']}" in email.body
+    assert f"Draft title: {metadata.draft['metadata']['title']}" in email.html
     assert f"User ID: {user_id}" in email.body
     assert f"User ID: {user_id}" in email.html
     assert f"User email: {user_email}" in email.body
@@ -1499,7 +1500,7 @@ def test_notification_on_first_upload(
     # Create a second draft work (different work)
     draft2_response = client.post(
         f"{app.config['SITE_API_URL']}/records",
-        data=json.dumps(minimal_record_metadata),
+        data=json.dumps(metadata.metadata_in),
         headers={**headers, "Authorization": f"Bearer {token}"},
     )
     assert draft2_response.status_code == 201
@@ -1527,8 +1528,8 @@ def test_notification_on_first_upload(
     app.logger.debug(f"email.body: {pformat(email.body)}")
     assert f"Work ID: {first_draft_id}" in email.body
     assert f"Work ID: {first_draft_id}" in email.html
-    assert f"Work title: {minimal_record_metadata['metadata']['title']}" in email.body
-    assert f"Work title: {minimal_record_metadata['metadata']['title']}" in email.html
+    assert f"Work title: {metadata.draft['metadata']['title']}" in email.body
+    assert f"Work title: {metadata.draft['metadata']['title']}" in email.html
     assert f"User ID: {user_id}" in email.body
     assert f"User ID: {user_id}" in email.html
     assert f"User email: {user_email}" in email.body
diff --git a/site/tests/api/test_api_record_ops.py b/site/tests/api/test_api_record_ops.py
index 3fccb8117..3179c28c5 100644
--- a/site/tests/api/test_api_record_ops.py
+++ b/site/tests/api/test_api_record_ops.py
@@ -1,25 +1,26 @@
 import pytest
 import arrow
 from datetime import timedelta
-import hashlib
+
+# import hashlib
 from invenio_access.permissions import authenticated_user, system_identity
 from invenio_access.utils import get_identity
-from invenio_files_rest.helpers import compute_checksum
+
+# from invenio_files_rest.helpers import compute_checksum
 from invenio_rdm_records.proxies import current_rdm_records_service as records_service
 import json
 from pathlib import Path
 from pprint import pformat
 import re
 from ..fixtures.users import user_data_set
+from ..fixtures.records import TestRecordMetadata, TestRecordMetadataWithFiles
 
 
 def test_draft_creation_api(
     running_app,
     db,
-    build_draft_record_links,
     user_factory,
     client_with_login,
-    minimal_record_metadata,
     headers,
     search_clear,
     celery_worker,
@@ -34,12 +35,13 @@ def test_draft_creation_api(
     user = u.user
     token = u.allowed_token
 
-    minimal_record_metadata.update({"files": {"enabled": False}})
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
         logged_in_client = client_with_login(client, user)
         response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -64,7 +66,7 @@ def test_draft_creation_api(
             == actual_draft["updated"]
         )
 
-        assert actual_draft["links"] == build_draft_record_links(
+        assert actual_draft["links"] == TestRecordMetadata.build_draft_record_links(
             actual_draft_id, app.config["SITE_API_URL"], app.config["SITE_UI_URL"]
         )
 
@@ -141,7 +143,7 @@ def test_draft_creation_api(
             "entries": {},
         }
         assert actual_draft["status"] == "draft"
-        publication_date = arrow.get(actual_draft["metadata"]["publication_date"])
+        # publication_date = arrow.get(actual_draft["metadata"]["publication_date"])
 
         # TODO: UI field only present in object sent to jinja template
         # we need to test that the jinja template is working correctly
@@ -200,7 +202,6 @@ def test_draft_creation_service(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -208,7 +209,8 @@ def test_draft_creation_service(
     minimal_draft_record_factory,
 ):
     app = running_app.app
-    result = minimal_draft_record_factory(metadata=minimal_record_metadata)
+    metadata = TestRecordMetadata(app=app)
+    result = minimal_draft_record_factory(metadata=metadata.metadata_in)
     actual_draft = result.to_dict()
     app.logger.debug(f"actual_draft: {pformat(actual_draft)}")
     assert actual_draft["is_draft"]
@@ -217,28 +219,23 @@ def test_draft_creation_service(
     assert actual_draft["versions"]["is_latest_draft"] is True
     assert actual_draft["versions"]["index"] == 1
     assert actual_draft["status"] == "draft"
-    assert actual_draft["files"]["enabled"] == False
+    assert actual_draft["files"]["enabled"] is False
     assert actual_draft["files"]["entries"] == {}
     assert (
-        actual_draft["metadata"]["creators"]
-        == minimal_record_metadata["metadata"]["creators"]
+        actual_draft["metadata"]["creators"] == metadata.draft["metadata"]["creators"]
     )
     assert (
-        actual_draft["metadata"]["publisher"]
-        == minimal_record_metadata["metadata"]["publisher"]
+        actual_draft["metadata"]["publisher"] == metadata.draft["metadata"]["publisher"]
     )
     assert (
         actual_draft["metadata"]["publication_date"]
-        == minimal_record_metadata["metadata"]["publication_date"]
+        == metadata.draft["metadata"]["publication_date"]
     )
     assert (
         actual_draft["metadata"]["resource_type"]["id"]
-        == minimal_record_metadata["metadata"]["resource_type"]["id"]
-    )
-    assert (
-        actual_draft["metadata"]["title"]
-        == minimal_record_metadata["metadata"]["title"]
+        == metadata.draft["metadata"]["resource_type"]["id"]
     )
+    assert actual_draft["metadata"]["title"] == metadata.draft["metadata"]["title"]
 
     read_result = records_service.read_draft(system_identity, actual_draft["id"])
     actual_read = read_result.to_dict()
@@ -251,7 +248,6 @@ def test_record_publication_api(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -259,6 +255,7 @@ def test_record_publication_api(
     mock_send_remote_api_update_fixture,
 ):
     app = running_app.app
+    metadata = TestRecordMetadata(app=app)
     u = user_factory(
         email=user_data_set["user1"]["email"],
         password="test",
@@ -272,10 +269,9 @@ def test_record_publication_api(
 
     with app.test_client() as client:
         logged_in_client = client_with_login(client, user)
-        minimal_record_metadata.update({"files": {"enabled": False}})
         response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert response.status_code == 201
@@ -304,7 +300,6 @@ def test_record_publication_service(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -313,9 +308,9 @@ def test_record_publication_service(
     minimal_draft_record_factory,
 ):
     """Test that a system user can create a draft record internally."""
-
-    minimal_record_metadata.update({"files": {"enabled": False}})
-    result = minimal_draft_record_factory(metadata=minimal_record_metadata)
+    app = running_app.app
+    metadata = TestRecordMetadata(app=app)
+    result = minimal_draft_record_factory(metadata=metadata.metadata_in)
     actual_draft = result.to_dict()
     actual_draft_id = actual_draft["id"]
 
@@ -345,13 +340,13 @@ def test_record_draft_update_api(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
     mock_send_remote_api_update_fixture,
 ):
     app = running_app.app
+    metadata = TestRecordMetadata(app=app)
 
     u = user_factory(
         email=user_data_set["user1"]["email"],
@@ -360,12 +355,11 @@ def test_record_draft_update_api(
     user = u.user
     token = u.allowed_token
 
-    minimal_record_metadata.update({"files": {"enabled": False}})
     with app.test_client() as client:
         logged_in_client = client_with_login(client, user)
         creation_response = logged_in_client.post(
             f"{app.config['SITE_API_URL']}/records",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert creation_response.status_code == 201
@@ -373,10 +367,11 @@ def test_record_draft_update_api(
         actual_draft = creation_response.json
         actual_draft_id = actual_draft["id"]
 
-        minimal_record_metadata["metadata"]["title"] = "A Romans Story 2"
+        metadata.update_metadata({"metadata|title": "A Romans Story 2"})
+        app.logger.debug(f"metadata.metadata_in: {pformat(metadata.metadata_in)}")
         update_response = logged_in_client.put(
             f"{app.config['SITE_API_URL']}/records/{actual_draft_id}/draft",
-            data=json.dumps(minimal_record_metadata),
+            data=json.dumps(metadata.metadata_in),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
         assert update_response.status_code == 200
@@ -410,7 +405,6 @@ def test_record_draft_update_service(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     minimal_draft_record_factory,
     headers,
     user_factory,
@@ -418,11 +412,12 @@ def test_record_draft_update_service(
     celery_worker,
     mock_send_remote_api_update_fixture,
 ):
-    minimal_record_metadata.update({"files": {"enabled": False}})
-    draft_result = minimal_draft_record_factory(metadata=minimal_record_metadata)
-    minimal_record_metadata["metadata"]["title"] = "A Romans Story 2"
+    app = running_app.app
+    metadata = TestRecordMetadata(app=app)
+    draft_result = minimal_draft_record_factory(metadata=metadata.metadata_in)
+    metadata.update_metadata({"metadata|title": "A Romans Story 2"})
     edited_draft_result = records_service.update_draft(
-        system_identity, draft_result.id, minimal_record_metadata
+        system_identity, draft_result.id, metadata.metadata_in
     )
     actual_edited = edited_draft_result.to_dict()
     assert actual_edited["id"] == draft_result.id
@@ -441,7 +436,6 @@ def test_record_published_update(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -455,7 +449,6 @@ def test_record_versioning(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -468,7 +461,6 @@ def test_record_file_upload_api_not_enabled(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -490,10 +482,12 @@ def test_record_file_upload_api_not_enabled(
 
     file_list = [{"key": "sample.pdf"}]
 
+    metadata = TestRecordMetadata(app=app)
+
     with app.test_client() as client:
-        minimal_record_metadata["files"] = {"enabled": False}
+        metadata.update_metadata({"files|enabled": False})
         draft_result = minimal_draft_record_factory(
-            identity=identity, metadata=minimal_record_metadata
+            identity=identity, metadata=metadata.metadata_in
         )
         draft_id = draft_result.id
 
@@ -512,7 +506,6 @@ def test_record_file_upload_api(
     running_app,
     db,
     client_with_login,
-    minimal_record_metadata,
     headers,
     user_factory,
     search_clear,
@@ -545,10 +538,14 @@ def test_record_file_upload_api(
     )
     file_list = [{"key": "sample.pdf"}]
 
+    metadata = TestRecordMetadataWithFiles(
+        app=app,
+        file_entries={f["key"]: f for f in file_list},
+    )
+
     with app.test_client() as client:
-        minimal_record_metadata["files"] = {"enabled": True}
         draft_result = minimal_draft_record_factory(
-            identity=identity, metadata=minimal_record_metadata
+            identity=identity, metadata=metadata.metadata_in
         )
         draft_id = draft_result.id
 
@@ -562,7 +559,7 @@ def test_record_file_upload_api(
             data=json.dumps(file_list),
             headers={**headers, "Authorization": f"Bearer {token}"},
         )
-        csrf_cookie = response.headers.get("Set-Cookie")
+        # csrf_cookie = response.headers.get("Set-Cookie")
         print("headers")
         print(headers)
         print("response headers")
@@ -681,7 +678,7 @@ def test_record_file_upload_api(
 
             # calculate the md5 checksum
             binary_file_data.seek(0)
-            md5_checksum = compute_checksum(binary_file_data, "md5", hashlib.md5())
+            # md5_checksum = compute_checksum(binary_file_data, "md5", hashlib.md5())
 
         # finalize the file upload
         headers.update({"content-type": "application/json"})
@@ -723,7 +720,8 @@ def test_record_file_upload_api(
                 "sample.pdf/content"
             ),
             "self": (
-                f"{app.config['SITE_API_URL']}/records/{draft_id}/draft/files/sample.pdf"
+                f"{app.config['SITE_API_URL']}/records/{draft_id}/draft/"
+                "files/sample.pdf"
             ),
             "commit": (
                 f"{app.config['SITE_API_URL']}/records/{draft_id}/draft/files/"
@@ -782,11 +780,9 @@ def test_record_file_upload_api(
 def test_record_view_api(
     running_app,
     db,
-    minimal_record_metadata,
     minimal_published_record_factory,
     search_clear,
     celery_worker,
-    build_published_record_links,
     mock_send_remote_api_update_fixture,
 ):
     """
@@ -796,113 +792,25 @@ def test_record_view_api(
     records API endpoint.
     """
     app = running_app.app
-    record = minimal_published_record_factory()
+    metadata = TestRecordMetadata(app=app, owner_id=None)
+    record = minimal_published_record_factory(metadata=metadata.metadata_in)
 
     with app.test_client() as client:
         record_response = client.get(f"/api/records/{record.id}")
         record = record_response.json
-        assert arrow.utcnow() - arrow.get(record["created"]) < timedelta(seconds=2)
-        assert arrow.utcnow() - arrow.get(record["updated"]) < timedelta(seconds=2)
-        assert record["access"] == {
-            "embargo": {"active": False, "reason": None},
-            "files": "public",
-            "record": "public",
-            "status": "metadata-only",
-        }
-        assert record["files"] == {
-            "count": 0,
-            "enabled": False,
-            "entries": {},
-            "order": [],
-            "total_bytes": 0,
-        }
-        assert record["deletion_status"] == {
-            "is_deleted": False,
-            "status": "P",
-        }
-        assert record["custom_fields"] == {}
-        assert record["media_files"] == {
-            "count": 0,
-            "enabled": False,
-            "entries": {},
-            "order": [],
-            "total_bytes": 0,
-        }
-        assert (
-            record["metadata"]["creators"]
-            == minimal_record_metadata["metadata"]["creators"]
-        )
-        assert (
-            record["metadata"]["publication_date"]
-            == minimal_record_metadata["metadata"]["publication_date"]
-        )
-        assert (
-            record["metadata"]["publisher"]
-            == minimal_record_metadata["metadata"]["publisher"]
-        )
+
         # Add title to resource type (updated by system after draft creation)
-        minimal_record_metadata["metadata"]["resource_type"]["title"] = {"en": "Photo"}
-        assert (
-            record["metadata"]["resource_type"]
-            == minimal_record_metadata["metadata"]["resource_type"]
-        )
-        assert not record["is_draft"]
-        assert record["is_published"]
-        assert record["links"] == build_published_record_links(
-            record["id"],
-            app.config["SITE_API_URL"],
-            app.config["SITE_UI_URL"],
-            record["parent"]["id"],
+        metadata.update_metadata(
+            {
+                "metadata|resource_type": {
+                    "id": "image-photograph",
+                    "title": {"en": "Photo"},
+                },
+            }
         )
-        assert record["parent"]["access"] == {
-            "owned_by": None,
-            "settings": {
-                "accept_conditions_text": None,
-                "allow_guest_requests": False,
-                "allow_user_requests": False,
-                "secret_link_expiration": 0,
-            },
-        }
-        assert record["parent"]["communities"] == {}
-        assert record["parent"]["id"] == record["parent"]["id"]
-        assert record["parent"]["pids"] == {
-            "doi": {
-                "client": "datacite",
-                "identifier": record["parent"]["pids"]["doi"]["identifier"],
-                "provider": "datacite",
-            },
-        }
-        assert record["pids"] == {
-            "doi": {
-                "client": "datacite",
-                "identifier": f"10.17613/{record['id']}",
-                "provider": "datacite",
-            },
-            "oai": {
-                "identifier": f"oai:{app.config['SITE_UI_URL']}:{record['id']}",
-                "provider": "oai",
-            },
-        }
+        app.logger.debug(f"metadata.metadata_in: {pformat(metadata.metadata_in)}")
+        metadata.compare_published(actual=record, by_api=True)
         assert record["revision_id"] == 3
-        assert record["stats"] == {
-            "all_versions": {
-                "data_volume": 0.0,
-                "downloads": 0,
-                "unique_downloads": 0,
-                "unique_views": 0,
-                "views": 0,
-            },
-            "this_version": {
-                "data_volume": 0.0,
-                "downloads": 0,
-                "unique_downloads": 0,
-                "unique_views": 0,
-                "views": 0,
-            },
-        }
-        assert record["status"] == "published"
-        assert record["versions"] == {"index": 1, "is_latest": True}
-        assert record["custom_fields"] == {}
 
 
 def test_records_api_endpoint_not_found(running_app):
diff --git a/site/tests/api/test_collections.py b/site/tests/api/test_collections.py
index ed0964e84..de2ad0176 100644
--- a/site/tests/api/test_collections.py
+++ b/site/tests/api/test_collections.py
@@ -17,6 +17,7 @@ def test_collection_submission_by_owner_open(
     minimal_community_factory,
     minimal_draft_record_factory,
     headers,
+    search_clear,
     celery_worker,
     mock_send_remote_api_update_fixture,
 ):
@@ -164,6 +165,7 @@ def test_collection_submission_by_curator_closed(
     minimal_community_factory,
     minimal_draft_record_factory,
     headers,
+    search_clear,
     mock_send_remote_api_update_fixture,
     celery_worker,
 ):
@@ -252,6 +254,8 @@ def test_collection_submission_by_curator_closed(
 
 def test_group_collection_read_all(
     running_app,
+    db,
+    search_clear,
     headers,
     user_factory,
     sample_communities_factory,
@@ -281,10 +285,10 @@ def test_group_collection_read_all(
         assert len(response.json["hits"]["hits"]) == 4
         assert response.json["sortBy"] == "updated-desc"
         assert response.json["links"] == {
-            "next": f"{app.config['SITE_API_URL']}/group_collections?"
+            "next": f"{app.config['SITE_API_URL']}/communities?"
             "page=2&q=%2B_exists_%3Acustom_fields.kcr%5C%3Acommons_instance%20"
             "&size=4&sort=updated-desc",
-            "self": f"{app.config['SITE_API_URL']}/group_collections?"
+            "self": f"{app.config['SITE_API_URL']}/communities?"
             "page=1&q=%2B_exists_%3Acustom_fields.kcr%5C%3Acommons_instance%20"
             "&size=4&sort=updated-desc",
         }
@@ -325,5 +329,7 @@ def test_group_collection_read_all(
 
 
 @pytest.mark.skip(reason="Not implemented")
-def test_group_collections_read_one(running_app, headers, user_factory):
+def test_group_collections_read_one(
+    running_app, db, search_clear, headers, user_factory
+):
     pass
diff --git a/site/tests/api/test_search_provisioning.py b/site/tests/api/test_search_provisioning.py
index 67dd18127..270c50d46 100644
--- a/site/tests/api/test_search_provisioning.py
+++ b/site/tests/api/test_search_provisioning.py
@@ -1,10 +1,8 @@
-import pytest
+# import pytest
 import arrow
 from invenio_access.permissions import system_identity
 from invenio_communities.proxies import current_communities
 from invenio_rdm_records.proxies import current_rdm_records
-from invenio_remote_api_provisioner.signals import remote_api_provisioning_triggered
-from invenio_queues.proxies import current_queues
 import json
 from kcworks.api_helpers import (
     format_commons_search_payload,
@@ -14,6 +12,8 @@
 from pprint import pformat
 import time
 
+from ..fixtures.records import TestRecordMetadata
+
 
 def test_trigger_search_provisioning(
     running_app,
@@ -21,7 +21,6 @@ def test_trigger_search_provisioning(
     db,
     requests_mock,
     monkeypatch,
-    minimal_record_metadata,
     user_factory,
     create_records_custom_fields,
     celery_worker,
@@ -54,15 +53,15 @@ def test_trigger_search_provisioning(
     service = current_rdm_records.records_service
 
     # Draft creation, no remote API operations should be prompted
-    draft = service.create(system_identity, minimal_record_metadata)
+    metadata = TestRecordMetadata(app=app)
+    draft = service.create(system_identity, metadata.metadata_in)
     actual_draft = draft.data
     assert actual_draft["metadata"]["title"] == "A Romans story"
     assert mock_adapter.call_count == 0
 
     # Draft edit, no remote API operations should be prompted
-    minimal_edited = minimal_record_metadata.copy()
-    minimal_edited["metadata"]["title"] = "A Romans Story 2"
-    edited_draft = service.update_draft(system_identity, draft.id, minimal_edited)
+    metadata.update_metadata({"metadata|title": "A Romans Story 2"})
+    edited_draft = service.update_draft(system_identity, draft.id, metadata.metadata_in)
     actual_edited = edited_draft.data.copy()
 
     assert actual_edited["metadata"]["title"] == "A Romans Story 2"
@@ -134,15 +133,16 @@ def test_trigger_search_provisioning(
 
     # edited draft new version
     # no remote API operation should be prompted
-    new_edited_data = new_version.data.copy()
-    new_edited_data["metadata"]["publication_date"] = arrow.now().format("YYYY-MM-DD")
-    new_edited_data["metadata"]["title"] = "A Romans Story 3"
-    # simulate the result of previous remote API operation
-    new_edited_data["custom_fields"]["kcr:commons_search_recid"] = remote_response[
-        "_id"
-    ]
+    metadata.update_metadata(
+        {
+            "metadata|title": "A Romans Story 3",
+            "metadata|publication_date": arrow.now().format("YYYY-MM-DD"),
+            # simulate the result of previous remote API operation
+            "custom_fields|kcr:commons_search_recid": remote_response["_id"],
+        }
+    )
     new_edited_version = service.update_draft(
-        system_identity, new_version.id, new_edited_data
+        system_identity, new_version.id, metadata.metadata_in
     )
     assert new_edited_version.data["metadata"]["title"] == "A Romans Story 3"
     # assert requests_mock.call_count == 1
@@ -447,9 +447,6 @@ def test_trigger_community_provisioning(
 
     # Set up mock subscriber and intercept message to callback
     monkeypatch.setenv("MOCK_SIGNAL_SUBSCRIBER", "True")
-    app.logger.debug(
-        f"app.config components: {pformat([c for c in app.config['COMMUNITIES_SERVICE_COMPONENTS']])}"
-    )
 
     # Set up mock remote API response
     rec_url = list(app.config["REMOTE_API_PROVISIONER_EVENTS"]["community"].keys())[0]
@@ -657,7 +654,6 @@ def test_trigger_community_provisioning(
 
 def test_search_id_recording_callback(
     running_app,
-    minimal_record_metadata,
     location,
     search,
     search_clear,
@@ -679,7 +675,8 @@ def test_search_id_recording_callback(
 
     # Set up minimal record to update after search provisioning
     service = current_rdm_records.records_service
-    draft = service.create(system_identity, minimal_record_metadata)
+    metadata = TestRecordMetadata(app=app)
+    draft = service.create(system_identity, metadata.metadata_in)
     read_record = service.read_draft(system_identity, draft.id)
     assert read_record.data["metadata"]["title"] == "A Romans story"
     assert read_record.data["custom_fields"].get("kcr:commons_search_recid") is None
diff --git a/site/tests/api/test_stats.py b/site/tests/api/test_stats.py
index 20f4c97d3..273fac9ec 100644
--- a/site/tests/api/test_stats.py
+++ b/site/tests/api/test_stats.py
@@ -5,32 +5,33 @@
 from invenio_stats.tasks import process_events, aggregate_events
 from invenio_rdm_records.proxies import current_rdm_records_service
 from invenio_rdm_records.records.stats.api import Statistics
-from invenio_search.proxies import current_search_client
 import pytest
 import uuid
 
+from ..fixtures.records import TestRecordMetadata
+
 
 @pytest.mark.skip("Not implemented")
-def test_stat_creation(running_app, db, search_clear, minimal_record_metadata):
-    draft = current_rdm_records_service.create(system_identity, minimal_record_metadata)
+def test_stat_creation(running_app, db, search_clear):
+    app = running_app.app
+    metadata = TestRecordMetadata(app=app)
+    draft = current_rdm_records_service.create(system_identity, metadata.metadata_in)
     published = current_rdm_records_service.publish(system_identity, draft["id"])
-    record_id = published["id"]
-    metadata_record = published["metadata"]
-    pid = published["pid"]
-    dt = arrow.now()
+    metadata.compare_published(published.to_dict())
 
 
 def test_stats_backend_processing(
     running_app,
     db,
     search_clear,
-    minimal_record_metadata,
     user_factory,
     create_stats_indices,
     celery_worker,
     mock_send_remote_api_update_fixture,
 ):
-    draft = current_rdm_records_service.create(system_identity, minimal_record_metadata)
+    app = running_app.app
+    metadata = TestRecordMetadata(app=app)
+    draft = current_rdm_records_service.create(system_identity, metadata.metadata_in)
     published = current_rdm_records_service.publish(system_identity, draft["id"])
     record_id = published.id
     metadata_record = published.to_dict()
diff --git a/site/tests/conftest.py b/site/tests/conftest.py
index 91f2b0b42..ededc1cb0 100644
--- a/site/tests/conftest.py
+++ b/site/tests/conftest.py
@@ -1,9 +1,9 @@
-from celery import Celery
-from celery.contrib.testing.worker import start_worker
 from collections import namedtuple
 import os
 from pathlib import Path
 import importlib
+import shutil
+import tempfile
 from invenio_app.factory import create_app as create_ui_api
 from invenio_queues import current_queues
 from invenio_search.proxies import current_search_client
@@ -31,6 +31,7 @@
 
 pytest_plugins = (
     "celery.contrib.pytest",
+    "tests.fixtures.files",
     "tests.fixtures.communities",
     "tests.fixtures.custom_fields",
     "tests.fixtures.records",
@@ -65,7 +66,7 @@ def _(x):
         "postgresql+psycopg2://kcworks:kcworks@localhost:5432/kcworks"
     ),
     "SQLALCHEMY_TRACK_MODIFICATIONS": False,
-    "SEARCH_INDEX_PREFIX": "",
+    "SEARCH_INDEX_PREFIX": "",  # TODO: Search index prefix triggers errors
     "POSTGRES_USER": "kcworks",
     "POSTGRES_PASSWORD": "kcworks",
     "POSTGRES_DB": "kcworks",
@@ -113,6 +114,10 @@ def _(x):
 test_config["LOGGING_FS_LEVEL"] = "DEBUG"
 test_config["LOGGING_FS_LOGFILE"] = str(log_file_path)
 test_config["CELERY_LOGFILE"] = str(log_folder_path / "celery.log")
+test_config["RECORD_IMPORTER_DATA_DIR"] = str(
+    parent_path / "helpers" / "sample_import_data"
+)
+test_config["RECORD_IMPORTER_LOGS_LOCATION"] = log_folder_path
 
 # enable DataCite DOI provider
 test_config["DATACITE_ENABLED"] = True
@@ -186,6 +191,31 @@ def celery_enable_logging():
 #         yield worker
 
 
+@pytest.yield_fixture(scope="module")
+def location(database):
+    """Creates a simple default location for a test.
+
+    Scope: function
+
+    Use this fixture if your test requires a `files location <https://invenio-
+    files-rest.readthedocs.io/en/latest/api.html#invenio_files_rest.models.
+    Location>`_. The location will be a default location with the name
+    ``pytest-location``.
+    """
+    from invenio_files_rest.models import Location
+
+    uri = tempfile.mkdtemp()
+    location_obj = Location(name="pytest-location", uri=uri, default=True)
+
+    database.session.add(location_obj)
+    database.session.commit()
+
+    yield location_obj
+
+    # TODO: Submit PR to pytest-invenio to fix the below line in the stock fixture
+    shutil.rmtree(uri)
+
+
 # This is a namedtuple that holds all the fixtures we're likely to need
 # in a single test.
 RunningApp = namedtuple(
@@ -269,13 +299,18 @@ def running_app(
 @pytest.fixture(scope="function")
 def search_clear(search_clear):
     """Clear search indices after test finishes (function scope)."""
+    #     #     # current_search_client.indices.delete(index="*")
+    #     #     # current_search_client.indices.delete_template("*")
+    #     #     # list(current_search.create())
+    #     #     # list(current_search.put_templates())
+    yield search_clear
+
+    # FIXME: Resource types are getting deleted from the index after
+    # class finishes
+
     #     # current_search_client.indices.delete(index="*")
+
     #     # current_search_client.indices.delete_template("*")
-    #     # list(current_search.create())
-    #     # list(current_search.put_templates())
-    yield search_clear
-    current_search_client.indices.delete(index="*")
-    # current_search_client.indices.delete_template("*")
 
 
 @pytest.fixture(scope="module")
@@ -311,19 +346,19 @@ def app(
     app_config,
     database,
     search,
-    affiliations_v,
+    # affiliations_v,
     # awards_v,
-    community_type_v,
-    contributors_role_v,
-    creators_role_v,
-    date_type_v,
-    description_type_v,
+    # community_type_v,
+    # contributors_role_v,
+    # creators_role_v,
+    # date_type_v,
+    # description_type_v,
     # funders_v,
-    language_v,
-    licenses_v,
+    # language_v,
+    # licenses_v,
     # relation_type_v,
-    resource_type_v,
-    subject_v,
+    # resource_type_v,
+    # subject_v,
     # title_type_v,
     template_loader,
     admin_roles,
diff --git a/site/tests/fixtures/communities.py b/site/tests/fixtures/communities.py
index 4e63fcadc..70ac168cb 100644
--- a/site/tests/fixtures/communities.py
+++ b/site/tests/fixtures/communities.py
@@ -3,12 +3,25 @@
 from invenio_access.utils import get_identity
 from invenio_accounts.proxies import current_accounts
 from invenio_communities.communities.records.api import Community
+from invenio_communities.members.records.api import Member
 from invenio_communities.proxies import current_communities
 import marshmallow as ma
 import traceback
 from typing import Callable, Optional
 
 
+def make_community_member(user_id: int, role: str, community_id: str) -> None:
+    """
+    Make a member of a community.
+    """
+    current_communities.service.members.add(
+        system_identity,
+        community_id,
+        data={"members": [{"type": "user", "id": str(user_id)}], "role": role},
+    )
+    Community.index.refresh()
+
+
 @pytest.fixture(scope="function")
 def communities_links_factory():
     """
diff --git a/site/tests/fixtures/files.py b/site/tests/fixtures/files.py
index 74a1d9bc7..afc4d6ad5 100644
--- a/site/tests/fixtures/files.py
+++ b/site/tests/fixtures/files.py
@@ -1,11 +1,32 @@
-import pytest
+import hashlib
+import os
 
 
-@pytest.fixture(scope="function")
-def build_file_links():
-    def _factory(record_id, base_url, upload_url):
-        return {
-            "self": f"{base_url}/records/{record_id}/draft/files",
-        }
+def file_md5(bytes_object):
+    return hashlib.md5(bytes_object).hexdigest()
 
-    return _factory
+
+def build_file_links(record_id, base_api_url, filename):
+    extension = os.path.splitext(filename)[1]
+
+    links = {
+        "content": f"{base_api_url}/records/{record_id}/files/{filename}/content",
+        "self": f"{base_api_url}/records/{record_id}/files/{filename}",
+    }
+    if extension not in [".csv", ".zip"]:
+        links.update(
+            {
+                "iiif_api": (
+                    f"{base_api_url}/iiif/record:{record_id}:{filename}/full/full/0/"
+                    "default.png"
+                ),
+                "iiif_base": f"{base_api_url}/iiif/record:{record_id}:{filename}",
+                "iiif_canvas": (
+                    f"{base_api_url}/iiif/record:{record_id}/canvas/{filename}"
+                ),
+                "iiif_info": (
+                    f"{base_api_url}/iiif/record:{record_id}:{filename}/info.json"
+                ),
+            }
+        )
+    return links
diff --git a/site/tests/fixtures/identifiers.py b/site/tests/fixtures/identifiers.py
index be335fe38..bcd8d8440 100644
--- a/site/tests/fixtures/identifiers.py
+++ b/site/tests/fixtures/identifiers.py
@@ -36,8 +36,7 @@ def _(x):
             "providers": ["datacite"],
             "required": True,
             "condition": (
-                lambda rec: rec.pids.get("doi", {}).get("provider")
-                == "datacite"
+                lambda rec: rec.pids.get("doi", {}).get("provider") == "datacite"
             ),
             "label": _("Concept DOI"),
             "validator": idutils.is_doi,
@@ -60,9 +59,7 @@ def _(x):
         providers.ExternalPIDProvider(
             "external",
             "doi",
-            validators=[
-                providers.BlockedPrefixes(config_names=["DATACITE_PREFIX"])
-            ],
+            validators=[providers.BlockedPrefixes(config_names=["DATACITE_PREFIX"])],
             label=_("DOI"),
         ),
         # OAI identifier
@@ -82,9 +79,7 @@ def _(x):
         providers.ExternalPIDProvider(
             "external",
             "doi",
-            validators=[
-                providers.BlockedPrefixes(config_names=["DATACITE_PREFIX"])
-            ],
+            validators=[providers.BlockedPrefixes(config_names=["DATACITE_PREFIX"])],
             label=_("DOI"),
         ),
         # OAI identifier
@@ -155,6 +150,16 @@ def _(x):
             "validator": always_valid,
             "datacite": "Other",
         },
+        "import-recid": {
+            "label": _("Import Record ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
+        "neh-recid": {
+            "label": _("NEH Record ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
     },
     "RDM_RECORDS_PERSONORG_SCHEMES": {
         **RDM_RECORDS_PERSONORG_SCHEMES,
@@ -178,6 +183,16 @@ def _(x):
             "validator": always_valid,
             "datacite": "Other",
         },
+        "neh_user_id": {
+            "label": _("NEH user ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
+        "import_user_id": {
+            "label": _("Import user ID"),
+            "validator": always_valid,
+            "datacite": "Other",
+        },
     },
     "VOCABULARIES_IDENTIFIER_SCHEMES": {
         "grid": {"label": _("GRID"), "validator": lambda x: True},
diff --git a/site/tests/fixtures/records.py b/site/tests/fixtures/records.py
index 7bc8bec80..55f551935 100644
--- a/site/tests/fixtures/records.py
+++ b/site/tests/fixtures/records.py
@@ -1,8 +1,18 @@
+import copy
+from pprint import pformat
+from flask import current_app, Flask
 import pytest
+import arrow
+from arrow import Arrow
+import datetime
 from flask_principal import Identity
 from invenio_access.permissions import system_identity
+from invenio_accounts.proxies import current_accounts
 from invenio_rdm_records.proxies import current_rdm_records_service as records_service
-from typing import Optional
+from invenio_record_importer_kcworks.utils.utils import replace_value_in_nested_dict
+from typing import Optional, Any
+from .files import build_file_links
+from .vocabularies.resource_types import RESOURCE_TYPES
 
 
 @pytest.fixture(scope="function")
@@ -10,7 +20,7 @@ def minimal_draft_record_factory(running_app, db, minimal_record_metadata):
     def _factory(
         metadata: Optional[dict] = None, identity: Optional[Identity] = None, **kwargs
     ):
-        input_metadata = metadata or minimal_record_metadata
+        input_metadata = metadata or minimal_record_metadata["in"]
         identity = identity or system_identity
         return records_service.create(identity, input_metadata)
 
@@ -22,7 +32,7 @@ def minimal_published_record_factory(running_app, db, minimal_record_metadata):
     def _factory(
         metadata: Optional[dict] = None, identity: Optional[Identity] = None, **kwargs
     ):
-        input_metadata = metadata or minimal_record_metadata
+        input_metadata = metadata or minimal_record_metadata["in"]
         identity = identity or system_identity
         draft = records_service.create(identity, input_metadata)
         return records_service.publish(identity, draft.id)
@@ -30,10 +40,208 @@ def _factory(
     return _factory
 
 
-@pytest.fixture()
-def minimal_record_metadata():
-    """Minimal record data as dict coming from the external world."""
-    return {
+def compare_metadata_draft(running_app):
+    app = running_app.app
+
+    def _comparison_factory(
+        actual, expected, community_list: list[dict] = [], now: Arrow = arrow.utcnow()
+    ):
+        """
+        Compare the actual and expected metadata dictionaries.
+
+        Does not check the following fields:
+
+        id
+        parent.id
+        revision_id
+
+        Some fields are only compared to the present time:
+
+        created
+        updated
+
+        Args:
+            actual (dict): The actual metadata dictionary.
+            expected (dict): The expected metadata dictionary.
+            now (Arrow, optional): The current time. Defaults to arrow.utcnow().
+
+        Returns:
+            bool: True if the actual metadata dictionary matches the expected
+            metadata dictionary, False otherwise.
+        """
+        try:
+            assert now - arrow.get(actual["created"]) < datetime.timedelta(seconds=1)
+            assert actual["custom_fields"] == {}
+            assert "expires_at" not in actual.keys()
+            assert actual["files"]["count"] == expected["files"]["count"]
+            assert actual["files"]["enabled"] == expected["files"]["enabled"]
+            for k, v in actual["files"]["entries"].items():
+                assert v["access"] == expected["files"]["entries"][k]["access"]
+                # assert v["checksum"]  # FIXME: Add checksum
+                assert v["ext"] == expected["files"]["entries"][k]["ext"]
+                assert v["key"] == expected["files"]["entries"][k]["key"]
+                assert v["mimetype"] == expected["files"]["entries"][k]["mimetype"]
+                assert v["size"] == expected["files"]["entries"][k]["size"]
+                assert (
+                    v["storage_class"]
+                    == expected["files"]["entries"][k]["storage_class"]
+                )
+                assert v["metadata"] == expected["files"]["entries"][k]["metadata"]
+                assert v["links"] == build_file_links(
+                    actual["id"], app.config["SITE_API_URL"], k
+                )
+            assert actual["files"]["order"] == expected["files"]["order"]
+            assert actual["files"]["total_bytes"] == expected["files"]["total_bytes"]
+
+            assert actual["is_draft"]
+            assert not actual["is_published"]
+            assert actual["links"] == TestRecordMetadata.build_draft_record_links(
+                actual["id"], app.config["SITE_API_URL"], app.config["SITE_UI_URL"]
+            )
+            assert actual["media_files"] == {
+                "count": 0,
+                "enabled": False,
+                "entries": {},
+                "order": [],
+                "total_bytes": 0,
+            }
+            assert actual["metadata"]["creators"] == expected["metadata"]["creators"]
+            assert (
+                actual["metadata"]["publication_date"]
+                == expected["metadata"]["publication_date"]
+            )
+            assert actual["metadata"]["publisher"] == expected["metadata"]["publisher"]
+            assert (
+                actual["metadata"]["resource_type"]
+                == expected["metadata"]["resource_type"]
+            )
+            assert actual["metadata"]["title"] == expected["metadata"]["title"]
+            assert actual["parent"]["access"] == expected["parent"]["access"]
+            assert actual["parent"]["communities"]["ids"] == [
+                c["id"] for c in community_list
+            ]
+            assert actual["pids"] == {
+                "doi": {
+                    "client": "datacite",
+                    "identifier": f"10.17613/{actual['id']}",
+                    "provider": "datacite",
+                },
+                "oai": {
+                    "identifier": f"oai:{app.config['SITE_UI_URL']}:{actual['id']}",
+                    "provider": "oai",
+                },
+            }
+            assert actual["revision_id"] == 3
+            assert actual["stats"] == expected["stats"]
+            assert actual["status"] == "draft"
+            assert now - arrow.get(actual["updated"]) < datetime.timedelta(seconds=1)
+            assert actual["versions"] == expected["versions"]
+            return True
+        except AssertionError as e:
+            app.logger.error(f"Assertion failed: {e}")
+            raise e
+
+    return _comparison_factory
+
+
+@pytest.fixture(scope="function")
+def record_metadata(running_app):
+    def _factory(
+        metadata_in: dict = {},
+        app: Flask = current_app,
+        community_list: list[dict] = [],
+        file_entries: dict = {},
+        owner_id: Optional[str] = "1",
+    ):
+        return TestRecordMetadata(
+            metadata_in=metadata_in,
+            app=running_app.app,
+            community_list=community_list,
+            file_entries=file_entries,
+            owner_id=owner_id,
+        )
+
+    return _factory
+
+
+class TestRecordMetadata:
+    """TestRecordMetadata is a utility class for mocking metadata for a record.
+
+    Given a metadata dictionary like the one required for record creation, an
+    instance of this class provides several versions of the metadata:
+
+    - `metadata_in` (property): The original metadata submitted for record creation.
+    - `draft` (property): The metadata as it appears in the record draft.
+    - `published` (property): The metadata as it appears in the published record.
+
+    The `metadata_in` property can be updated with new values via the `update_metadata`
+    method. The updates will be reflected in the `draft` and `published` metadata
+    properties.
+
+    The `draft` and `published` properties are read-only.
+
+    The class also provides comparison methods to check whether a given metadata
+    dictionary matches the expected metadata for a draft or published record.
+    - `compare_draft`
+    - `compare_published`
+
+    This class is intended to be used in conjunction with the function-scoped
+    `record_metadata` fixture, which will create a new instance of this class
+    for each test function.
+
+    Usage example:
+
+    ```python
+    def my_test_function(record_metadata):
+        test_metadata = record_metadata(
+            metadata_in={
+                "title": "Old Title",
+            },
+            community_list=[],
+            file_entries={},
+            owner_id="1",
+        )
+
+        # Update the input metadata on the fly.
+        test_metadata.update_metadata({"title": "New Title"})
+        assert test_metadata.draft["title"] == "New Title"
+        assert test_metadata.published["title"] == "New Title"
+
+        # Get the draft and published metadata as dictionaries.
+        metadata_out_draft = test_metadata.draft
+        metadata_out_published = test_metadata.published
+
+        # Use the compare methods to check whether draft and published metadata
+        # from test operations match the expected metadata.
+        # Note that you don't need to pass in the expected metadata as a dictionary,
+        # just the actual metadata.
+        test_metadata.compare_draft(my_draft_dict_to_test)
+        test_metadata.compare_published(my_published_dict_to_test)
+
+        # Compare actual metadata dictionaries with expected metadata dictionaries
+        # with variations seen in REST API results.
+        test_metadata.compare_draft_via_api(my_draft_dict_to_test, by_api=True)
+        test_metadata.compare_published_via_api(my_published_dict_to_test, by_api=True)
+    ```
+
+    The input metadata dictionary can include the distinctive content used in the
+    streamlined import API. For example:
+
+    ```python
+    metadata_in={
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {"email": "test@example.com"},
+                    {"email": "test2@example.com"},
+                ]
+            },
+        },
+    }
+    ```
+    """
+
+    default_metadata_in: dict = {
         "pids": {},
         "access": {
             "record": "public",
@@ -67,6 +275,642 @@ def minimal_record_metadata():
         },
     }
 
+    def __init__(
+        self,
+        metadata_in: dict = {},
+        app: Flask = current_app,
+        community_list: list[dict] = [],
+        file_entries: dict = {},
+        owner_id: Optional[str] = "1",
+    ):
+        """
+        Initialize the TestRecordMetadata object.
+
+        Args:
+            metadata_in (dict): The metadata of the record.
+            app (Flask, optional): The Flask application. Defaults to current_app.
+            community_list (list[dict], optional): The list of communities,
+                each expected to be a dict with the following keys: id, access,
+                children, custom_fields, deletion_status, links, metadata,
+                revision_id, slug, updated. Defaults to [].
+            owner_id (str, optional): The record owner ID. Defaults to "1".
+        """
+        self.app = app
+        starting_metadata_in = copy.deepcopy(TestRecordMetadata.default_metadata_in)
+        self._metadata_in: dict = metadata_in if metadata_in else starting_metadata_in
+        self.community_list = community_list
+        self.file_entries = file_entries
+        self.owner_id = owner_id
+
+    def update_metadata(self, metadata_updates: dict[str, Any] = {}) -> None:
+        """
+        Update the basic metadata dictionary for the record.
+
+        Args:
+            metadata_updates (dict): A dictionary of metadata updates. The keys are
+            bar separated (NOT dot separated) paths to the values to update. The values
+            are the new values to update the metadata with at those paths.
+        """
+        for key, val in metadata_updates.items():
+            new_metadata_in = replace_value_in_nested_dict(self.metadata_in, key, val)
+            self._metadata_in = (
+                new_metadata_in
+                if isinstance(new_metadata_in, dict)
+                else self.metadata_in
+            )
+
+    @property
+    def metadata_in(self) -> dict:
+        """Minimal record data as dict coming from the external world.
+
+        Fields that can't be set before record creation:
+        """
+        self._metadata_in["files"] = {"enabled": False}
+        return self._metadata_in
+
+    @staticmethod
+    def build_draft_record_links(record_id, base_url, ui_base_url):
+        return {
+            "self": f"{base_url}/records/{record_id}/draft",
+            "self_html": f"{ui_base_url}/uploads/{record_id}",
+            "self_iiif_manifest": f"{base_url}/iiif/draft:{record_id}/manifest",
+            "self_iiif_sequence": f"{base_url}/iiif/draft:{record_id}/sequence/default",
+            "files": f"{base_url}/records/{record_id}/draft/files",
+            "media_files": f"{base_url}/records/{record_id}/draft/media-files",
+            "archive": f"{base_url}/records/{record_id}/draft/files-archive",
+            "archive_media": (
+                f"{base_url}/records/{record_id}/draft/media-files-archive"
+            ),
+            "record": f"{base_url}/records/{record_id}",
+            "record_html": f"{ui_base_url}/records/{record_id}",
+            "publish": f"{base_url}/records/{record_id}/draft/actions/publish",
+            "review": f"{base_url}/records/{record_id}/draft/review",
+            "versions": f"{base_url}/records/{record_id}/versions",
+            "access_links": f"{base_url}/records/{record_id}/access/links",
+            "access_grants": f"{base_url}/records/{record_id}/access/grants",
+            "access_users": f"{base_url}/records/{record_id}/access/users",
+            "access_groups": f"{base_url}/records/{record_id}/access/groups",
+            "access_request": f"{base_url}/records/{record_id}/access/request",
+            "access": f"{base_url}/records/{record_id}/access",
+            "reserve_doi": f"{base_url}/records/{record_id}/draft/pids/doi",
+            "communities": f"{base_url}/records/{record_id}/communities",
+            "communities-suggestions": (
+                f"{base_url}/records/{record_id}/communities-suggestions"
+            ),
+            "requests": f"{base_url}/records/{record_id}/requests",
+        }
+
+    @staticmethod
+    def build_published_record_links(
+        record_id: str,
+        base_url: str,
+        ui_base_url: str,
+        parent_id: str,
+        record_doi: str = "",
+    ):
+        if not record_doi:
+            record_doi = f"10.17613/{record_id}"
+        parent_doi = f"10.17613/{parent_id}"
+        links = TestRecordMetadata.build_draft_record_links(
+            record_id, base_url, ui_base_url
+        )
+        links["archive"] = f"{base_url}/records/{record_id}/files-archive"
+        links["archive_media"] = f"{base_url}/records/{record_id}/media-files-archive"
+        links["doi"] = f"https://handle.stage.datacite.org/{record_doi}"
+        links["draft"] = f"{base_url}/records/{record_id}/draft"
+        links["files"] = f"{base_url}/records/{record_id}/files"
+        links["latest"] = f"{base_url}/records/{record_id}/versions/latest"
+        links["latest_html"] = f"{ui_base_url}/records/{record_id}/latest"
+        links["media_files"] = f"{base_url}/records/{record_id}/media-files"
+        del links["publish"]
+        del links["record"]
+        del links["record_html"]
+        links["parent"] = f"{base_url}/records/{parent_id}"
+        links["parent_doi"] = f"{ui_base_url}/doi/{parent_doi}"
+        links["parent_html"] = f"{ui_base_url}/records/{parent_id}"
+        del links["review"]
+        links["self"] = f"{base_url}/records/{record_id}"
+        links["self_html"] = f"{ui_base_url}/records/{record_id}"
+        links["self_doi"] = f"{ui_base_url}/doi/{record_doi}"
+        links["self_iiif_manifest"] = f"{base_url}/iiif/record:{record_id}/manifest"
+        links["self_iiif_sequence"] = (
+            f"{base_url}/iiif/record:{record_id}/sequence/default"
+        )
+
+        return links
+
+    @property
+    def draft(self):
+        """Minimal record data as dict coming from the external world.
+
+        Fields that can't be set before record creation:
+        """
+        metadata_out_draft = copy.deepcopy(self.metadata_in)
+        metadata_out_draft.get("access", {})["embargo"] = {
+            "active": False,
+            "reason": None,
+        }
+        metadata_out_draft.get("access", {})["status"] = "metadata-only"
+        metadata_out_draft["deletion_status"] = {"is_deleted": False, "status": "P"}
+        metadata_out_draft["custom_fields"] = self.metadata_in.get("custom_fields", {})
+        metadata_out_draft["is_draft"] = True
+        metadata_out_draft["is_published"] = False
+        current_resource_type = [
+            t
+            for t in copy.deepcopy(RESOURCE_TYPES)
+            if t["id"] == metadata_out_draft["metadata"]["resource_type"]["id"]
+        ][0]
+        metadata_out_draft["metadata"]["resource_type"]["title"] = (
+            current_resource_type["title"]
+        )
+        metadata_out_draft["versions"] = {
+            "index": 1,
+            "is_latest": False,
+            "is_latest_draft": True,
+        }
+        metadata_out_draft["media_files"] = {
+            "count": 0,
+            "enabled": False,
+            "entries": {},
+            "order": [],
+            "total_bytes": 0,
+        }
+        metadata_out_draft["files"] = {
+            "count": 0,
+            "enabled": False,
+            "entries": {},
+            "order": [],
+            "total_bytes": 0,
+            **metadata_out_draft["files"],  # For inheritance
+        }
+        metadata_out_draft["parent"] = {
+            "access": {
+                "grants": [],
+                "links": [],
+                "owned_by": {"user": "1"},
+                "settings": {
+                    "accept_conditions_text": None,
+                    "allow_guest_requests": False,
+                    "allow_user_requests": False,
+                    "secret_link_expiration": 0,
+                },
+            },
+            "communities": {
+                "default": "215de947-a24d-4255-973c-25306e19a0aa",
+                "entries": [],
+                "ids": ["215de947-a24d-4255-973c-25306e19a0aa"],
+                "id": "74wky-xv103",
+                "pids": {
+                    "doi": {
+                        "client": "datacite",
+                        "identifier": "10.17613/74wky-xv103",
+                        "provider": "datacite",
+                    }
+                },
+            },
+        }
+        for c in self.community_list:
+            defaults = {
+                "access": {
+                    "member_policy": "open",
+                    "members_visibility": "public",
+                    "record_policy": "open",
+                    "review_policy": "open",
+                    "visibility": "public",
+                },
+                "children": {"allow": False},
+                "created": "",
+                "custom_fields": {},
+                "deletion_status": {"is_deleted": False, "status": "P"},
+                "id": c["id"],
+                "links": {},
+                "metadata": {
+                    "curation_policy": c["metadata"].get("curation_policy", ""),
+                    "description": c["metadata"].get("description", ""),
+                    "organizations": [{"name": ""}],
+                    "page": c["metadata"].get("page", ""),
+                    "title": c["metadata"].get("title", ""),
+                    "type": {"id": c["metadata"].get("type", "")},
+                    "website": c["metadata"].get("website", ""),
+                },
+                "revision_id": 2,
+                "slug": c["slug"],
+                "updated": "",
+            }
+            defaults.update(c)
+            metadata_out_draft["parent"]["communities"]["entries"].append(defaults)
+
+        metadata_out_draft["pids"] = {
+            "doi": {
+                "client": "datacite",
+                "identifier": "10.17613/XXXX",
+                "provider": "datacite",
+            },
+            "oai": {
+                "identifier": f"oai:{self.app.config['SITE_UI_URL']}:XXXX",
+                "provider": "oai",
+            },
+        }
+        metadata_out_draft["revision_id"] = 3
+        metadata_out_draft["stats"] = {
+            "all_versions": {
+                "data_volume": 0.0,
+                "downloads": 0,
+                "unique_downloads": 0,
+                "unique_views": 0,
+                "views": 0,
+            },
+            "this_version": {
+                "data_volume": 0.0,
+                "downloads": 0,
+                "unique_downloads": 0,
+                "unique_views": 0,
+                "views": 0,
+            },
+        }
+        metadata_out_draft["status"] = "draft"
+        metadata_out_draft["updated"] = ""
+        return metadata_out_draft
+
+    @property
+    def published(self):
+        """Minimal record data as dict coming from the external world.
+
+        Fields that can't be set before record creation:
+        """
+        metadata_out_published = copy.deepcopy(self.draft)
+        metadata_out_published["is_draft"] = False
+        metadata_out_published["is_published"] = True
+        metadata_out_published["versions"] = {
+            "index": 1,
+            "is_latest": True,
+            "is_latest_draft": True,
+        }
+        owners_in = self.metadata_in.get("parent", {}).get("access", {}).get("owned_by")
+        if isinstance(owners_in, list):  # When by import, this is a list of dicts
+            owner_users = [
+                current_accounts.datastore.get_user_by_email(owner["email"])
+                for owner in owners_in
+            ]
+            metadata_out_published["parent"]["access"]["owned_by"] = (
+                {"user": str(owner_users[0].id)} if owner_users else None
+            )
+            if len(owner_users) > 1:
+                metadata_out_published["parent"]["access"]["grants"] = [
+                    {
+                        "origin": None,
+                        "subject": {
+                            "id": str(owner.id),
+                            "type": "user",
+                        },
+                        "permission": "manage",
+                    }
+                    for owner in owner_users[1:]
+                ]
+        return metadata_out_published
+
+    def __str__(self):
+        return pformat(self.metadata_in)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def compare_draft(self, metadata_out_draft):
+        assert self.draft == metadata_out_draft
+
+    def _as_via_api(self, metadata_in: dict) -> dict:
+        metadata_in["parent"]["access"].pop("grants")
+        metadata_in["parent"]["access"].pop("links")
+        metadata_in["versions"].pop("is_latest_draft")
+        return metadata_in
+
+    def compare_published(
+        self,
+        actual: dict,
+        expected: dict = {},
+        by_api: bool = False,
+        now: Arrow = arrow.utcnow(),
+    ) -> bool:
+        """
+        Compare the actual and expected metadata dictionaries.
+
+        Does not check the following fields:
+
+        id
+        parent.id
+        revision_id
+
+        Some fields are only compared to the present time:
+
+        created
+        updated
+
+        Args:
+            actual (dict): The actual metadata dictionary.
+            expected (dict): The expected metadata dictionary.
+            now (Arrow, optional): The current time. Defaults to arrow.utcnow().
+        Raises:
+            AssertionError: If the actual metadata dictionary does not match
+                the expected metadata dictionary.
+
+        Returns:
+            bool: True if the actual metadata dictionary matches the expected
+                metadata dictionary, False otherwise.
+        """
+        app = self.app
+        expected = self.published.copy() if not expected else expected
+
+        if by_api:
+            expected = self._as_via_api(expected)
+        try:
+            assert now - arrow.get(actual["created"]) < datetime.timedelta(seconds=1)
+            assert actual["custom_fields"] == expected["custom_fields"]
+            assert "expires_at" not in actual.keys()
+            assert actual["files"]["count"] == expected["files"]["count"]
+            assert actual["files"]["enabled"] == expected["files"]["enabled"]
+            for k, v in actual["files"]["entries"].items():
+                assert v["access"] == expected["files"]["entries"][k]["access"]
+                if "checksum" in expected["files"]["entries"][k]:
+                    assert v["checksum"] == expected["files"]["entries"][k]["checksum"]
+                assert v["ext"] == expected["files"]["entries"][k]["ext"]
+                assert v["key"] == expected["files"]["entries"][k]["key"]
+                assert v["mimetype"] == expected["files"]["entries"][k]["mimetype"]
+                assert v["size"] == expected["files"]["entries"][k]["size"]
+                assert (
+                    v["storage_class"]
+                    == expected["files"]["entries"][k]["storage_class"]
+                )
+                if v["metadata"]:
+                    assert v["metadata"] == expected["files"]["entries"][k]["metadata"]
+                else:
+                    assert not expected["files"]["entries"][k]["metadata"]
+                assert v["links"] == build_file_links(
+                    actual["id"], app.config["SITE_API_URL"], k
+                )
+            assert actual["files"]["order"] == expected["files"]["order"]
+            assert actual["files"]["total_bytes"] == expected["files"]["total_bytes"]
+
+            assert not actual["is_draft"]
+            assert actual["is_published"]
+            assert actual["links"] == TestRecordMetadata.build_published_record_links(
+                actual["id"],
+                app.config["SITE_API_URL"],
+                app.config["SITE_UI_URL"],
+                actual["parent"]["id"],
+                actual["pids"]["doi"]["identifier"],
+            )
+            assert actual["media_files"] == {
+                "count": 0,
+                "enabled": False,
+                "entries": {},
+                "order": [],
+                "total_bytes": 0,
+            }
+            assert actual["metadata"]["creators"] == expected["metadata"]["creators"]
+            assert (
+                actual["metadata"]["publication_date"]
+                == expected["metadata"]["publication_date"]
+            )
+            assert actual["metadata"]["publisher"] == expected["metadata"]["publisher"]
+            assert (
+                actual["metadata"]["resource_type"]
+                == expected["metadata"]["resource_type"]
+            )
+            assert actual["metadata"]["title"] == expected["metadata"]["title"]
+
+            expected["parent"]["access"]["owned_by"] = (
+                {"user": str(self.owner_id)} if self.owner_id else None
+            )
+
+            assert actual["parent"]["access"] == expected["parent"]["access"]
+            if self.community_list:
+                assert len(actual["parent"]["communities"]["entries"]) == len(
+                    self.community_list
+                )
+                assert (
+                    actual["parent"]["communities"]["default"]
+                    == self.community_list[0]["id"]
+                )
+
+                for community in self.community_list:
+                    actual_c = [
+                        c
+                        for c in actual["parent"]["communities"]["entries"]
+                        if c["id"] == community["id"]
+                    ][0]
+                    assert actual_c["access"] == community["access"]
+                    assert actual_c["children"] == community["children"]
+                    assert actual_c["created"] == community["created"]
+                    assert actual_c["custom_fields"] == community["custom_fields"]
+                    assert actual_c["deletion_status"] == community["deletion_status"]
+                    assert actual_c["id"] == community["id"]
+                    assert actual_c["links"] == {}
+                    if (
+                        "title" in community["metadata"]["type"]
+                    ):  # expansion inconsistent
+                        community["metadata"]["type"].pop("title")
+                    assert actual_c["metadata"] == community["metadata"]
+                    assert actual_c["revision_id"] == community["revision_id"]
+                    assert actual_c["slug"] == community["slug"]
+                    assert actual_c["updated"] == community["updated"]
+                assert actual["parent"]["communities"]["ids"] == [
+                    c["id"] for c in self.community_list
+                ]
+                assert actual["parent"]["pids"] == {
+                    "doi": {
+                        "client": "datacite",
+                        "identifier": (f"10.17613/{actual['parent']['id']}"),
+                        "provider": "datacite",
+                    },
+                }
+            expected_pids = {
+                "doi": {
+                    "client": "datacite",
+                    "identifier": actual["pids"]["doi"]["identifier"],
+                    "provider": "datacite",
+                },
+                "oai": {
+                    "identifier": f"oai:{app.config['SITE_UI_URL']}:{actual['id']}",
+                    "provider": "oai",
+                },
+            }
+            try:
+                assert actual["pids"] == expected_pids
+            except AssertionError as e:
+                expected_pids["oai"]["identifier"] = expected_pids["oai"][
+                    "identifier"
+                ].replace(
+                    app.config["SITE_UI_URL"], "https://localhost:5000"
+                )  # 127.0.0.1 is not always working in tests
+                app.logger.error(f"Assertion failed: {e}")
+                assert actual["pids"] == expected_pids
+            # assert actual["revision_id"] == 4  # NOTE: Too difficult to test
+            assert actual["stats"] == expected["stats"]
+            assert actual["status"] == "published"
+            assert now - arrow.get(actual["updated"]) < datetime.timedelta(seconds=1)
+            assert actual["versions"] == expected["versions"]
+            return True
+        except AssertionError as e:
+            app.logger.error(f"Assertion failed: {e}")
+            raise e
+
+
+@pytest.fixture(scope="function")
+def record_metadata_with_files(running_app):
+    def _factory(
+        metadata_in: dict = {},
+        app: Flask = current_app,
+        community_list: list[dict] = [],
+        file_entries: dict = {},
+        owner_id: Optional[str] = "1",
+    ):
+        return TestRecordMetadataWithFiles(
+            metadata_in=metadata_in,
+            app=running_app.app,
+            community_list=community_list,
+            file_entries=file_entries,
+            owner_id=owner_id,
+        )
+
+    return _factory
+
+
+class TestRecordMetadataWithFiles(TestRecordMetadata):
+    """
+    This class extends the TestRecordMetadata class for records with files
+
+    In addition to the usual instantiation arguments, the `file_entries` argument
+    can be used to provide a dictionary of file entries shaped like the
+    `files` section of the streamlined import API. For example:
+
+    ```python
+    file_entries={
+        "file1": {"mimetype": "text/plain", "size": 100},
+        "file2": {"mimetype": "text/plain", "size": 200},
+    }
+
+    The `file_access_status` argument can be used to set the access status of the files. (Default: "open")
+    ```
+    """
+
+    def __init__(
+        self,
+        app: Flask = current_app,
+        record_id: str = "XXXX",
+        metadata_in: dict = {},
+        community_list: list[dict] = [],
+        file_access_status: str = "open",
+        file_entries: dict = {},
+        owner_id: str = "1",
+    ):
+        super().__init__(
+            app=app,
+            community_list=community_list,
+            file_entries=file_entries,
+            owner_id=owner_id,
+        )
+        starting_metadata_in = TestRecordMetadata.default_metadata_in.copy()
+        self._metadata_in = metadata_in if metadata_in else starting_metadata_in
+        self.record_id = record_id
+        self.file_entries = file_entries
+        self.file_access_status = file_access_status
+
+    @property
+    def metadata_in(self):
+        self._metadata_in["files"]["enabled"] = True
+        self._metadata_in["files"]["entries"] = self.file_entries
+        self._metadata_in.get("access", {})["status"] = self.file_access_status
+        return self._metadata_in
+
+    def _add_file_entries(self, metadata):
+        metadata["files"]["count"] = len(self.file_entries.keys())
+        metadata["files"]["total_bytes"] = sum(
+            [e["size"] for k, e in self.file_entries.items()]
+        )
+        metadata["files"]["order"] = []
+        for k, e in self.file_entries.items():
+            file_links = build_file_links(
+                self.record_id, self.app.config["SITE_API_URL"], k
+            )
+            defaults = {
+                "access": {"hidden": False},
+                "ext": k[-3:],
+                "metadata": {},
+                "mimetype": e["mimetype"],
+                "key": k,
+                "size": 0,
+                "storage_class": "L",
+                "links": file_links,
+                "id": "XXXX",
+            }
+            metadata["files"]["entries"][k] = {
+                **defaults,
+                **e,
+            }
+            # because sometimes e["links"] is from prior run without record_id...
+            if e.get("links") and "XXXX" in e["links"]["content"]:
+                metadata["files"]["entries"][k]["links"] = file_links
+        return metadata
+
+    @property
+    def draft(self):
+        draft = super().draft
+        draft = self._add_file_entries(draft)
+        return draft
+
+    @property
+    def published(self):
+        published = super().published
+        published = self._add_file_entries(published)
+        return published
+
+
+@pytest.fixture(scope="function")
+def minimal_record_metadata(running_app):
+    """Minimal record data as dict coming from the external world.
+
+    Fields that can't be set before record creation:
+
+    created
+    id
+    updated
+    pids
+    parent.pids
+    parent.id
+
+    """
+    app = running_app.app
+    metadata = TestRecordMetadata(app=app)
+    return {
+        "in": metadata.metadata_in,
+        "draft": metadata.draft,
+        "published": metadata.published,
+    }
+
+
+@pytest.fixture(scope="function")
+def minimal_record_metadata_with_files(running_app):
+    app = running_app.app
+
+    def _factory(
+        record_id: str = "XXXX", entries: dict = {}, access_status: str = "open"
+    ):
+
+        metadata = TestRecordMetadataWithFiles(
+            app=app,
+            record_id=record_id,
+            file_entries=entries,
+            file_access_status=access_status,
+        )
+
+        return {
+            "in": metadata.metadata_in,
+            "draft": metadata.draft,
+            "published": metadata.published,
+        }
+
+    return _factory
+
 
 @pytest.fixture(scope="function")
 def full_record_metadata(users):
@@ -246,71 +1090,3 @@ def full_record_metadata(users):
         },
         "notes": ["Under investigation for copyright infringement."],
     }
-
-
-@pytest.fixture(scope="function")
-def build_draft_record_links():
-    def _factory(record_id, base_url, ui_base_url):
-        return {
-            "self": f"{base_url}/records/{record_id}/draft",
-            "self_html": f"{ui_base_url}/uploads/{record_id}",
-            "self_iiif_manifest": f"{base_url}/iiif/draft:{record_id}/manifest",
-            "self_iiif_sequence": f"{base_url}/iiif/draft:{record_id}/sequence/default",
-            "files": f"{base_url}/records/{record_id}/draft/files",
-            "media_files": f"{base_url}/records/{record_id}/draft/media-files",
-            "archive": f"{base_url}/records/{record_id}/draft/files-archive",
-            "archive_media": (
-                f"{base_url}/records/{record_id}/draft/media-files-archive"
-            ),
-            "record": f"{base_url}/records/{record_id}",
-            "record_html": f"{ui_base_url}/records/{record_id}",
-            "publish": f"{base_url}/records/{record_id}/draft/actions/publish",
-            "review": f"{base_url}/records/{record_id}/draft/review",
-            "versions": f"{base_url}/records/{record_id}/versions",
-            "access_links": f"{base_url}/records/{record_id}/access/links",
-            "access_grants": f"{base_url}/records/{record_id}/access/grants",
-            "access_users": f"{base_url}/records/{record_id}/access/users",
-            "access_groups": f"{base_url}/records/{record_id}/access/groups",
-            "access_request": f"{base_url}/records/{record_id}/access/request",
-            "access": f"{base_url}/records/{record_id}/access",
-            "reserve_doi": f"{base_url}/records/{record_id}/draft/pids/doi",
-            "communities": f"{base_url}/records/{record_id}/communities",
-            "communities-suggestions": (
-                f"{base_url}/records/{record_id}/communities-suggestions"
-            ),
-            "requests": f"{base_url}/records/{record_id}/requests",
-        }
-
-    return _factory
-
-
-@pytest.fixture(scope="function")
-def build_published_record_links(build_draft_record_links):
-    def _factory(record_id, base_url, ui_base_url, parent_id):
-        links = build_draft_record_links(record_id, base_url, ui_base_url)
-        links["archive"] = f"{base_url}/records/{record_id}/files-archive"
-        links["archive_media"] = f"{base_url}/records/{record_id}/media-files-archive"
-        links["doi"] = f"https://handle.stage.datacite.org/10.17613/{record_id}"
-        links["draft"] = f"{base_url}/records/{record_id}/draft"
-        links["files"] = f"{base_url}/records/{record_id}/files"
-        links["latest"] = f"{base_url}/records/{record_id}/versions/latest"
-        links["latest_html"] = f"{ui_base_url}/records/{record_id}/latest"
-        links["media_files"] = f"{base_url}/records/{record_id}/media-files"
-        del links["publish"]
-        del links["record"]
-        del links["record_html"]
-        links["parent"] = f"{base_url}/records/{parent_id}"
-        links["parent_doi"] = f"{ui_base_url}/doi/10.17613/{parent_id}"
-        links["parent_html"] = f"{ui_base_url}/records/{parent_id}"
-        del links["review"]
-        links["self"] = f"{base_url}/records/{record_id}"
-        links["self_html"] = f"{ui_base_url}/records/{record_id}"
-        links["self_doi"] = f"{ui_base_url}/doi/10.17613/{record_id}"
-        links["self_iiif_manifest"] = f"{base_url}/iiif/record:{record_id}/manifest"
-        links["self_iiif_sequence"] = (
-            f"{base_url}/iiif/record:{record_id}/sequence/default"
-        )
-
-        return links
-
-    return _factory
diff --git a/site/tests/fixtures/saml.py b/site/tests/fixtures/saml.py
index aff988dba..f3a538759 100644
--- a/site/tests/fixtures/saml.py
+++ b/site/tests/fixtures/saml.py
@@ -168,6 +168,7 @@
                 "profile": {
                     "username": "knowledgeCommons-joanjett",
                     "full_name": "Joan Jett",
+                    "identifier_kc_username": "joanjett",
                 },
             },
             "external_id": "joanjett",
@@ -206,6 +207,9 @@
                     "username": "knowledgeCommons-user1",
                     "full_name": "User Number One",
                     "affiliations": "Independent Scholar",
+                    # Added from api call
+                    "identifier_orcid": "0000-0002-1825-0097",  # official dummy orcid
+                    "identifier_kc_username": "user1",
                 },
             },
             "external_id": "user1",
@@ -240,6 +244,8 @@
                     "username": "knowledgeCommons-janedoe",
                     "full_name": "Jane Doe",
                     "affiliations": "College Of Human Medicine",
+                    "identifier_orcid": "0000-0002-1825-0097",
+                    "identifier_kc_username": "janedoe",
                 },
             },
             "external_id": "janedoe",
@@ -290,8 +296,9 @@
                 "email": None,  # FIXME: Unobfuscated email not sent by
                 # KC because no email marked as official
                 "profile": {
-                    "full_name": "Ghost Hc",
                     "username": "knowledgeCommons-gihctester",
+                    "full_name": "Ghost Hc",
+                    "identifier_kc_username": "gihctester",
                 },
             },
             "external_id": "gihctester",
@@ -326,8 +333,10 @@
             "user": {
                 "email": "jrghosttester@email.ghostinspector.com",
                 "profile": {
-                    "full_name": "Ghost Tester",
                     "username": "knowledgeCommons-ghostrjtester",
+                    "full_name": "Ghost Tester",
+                    "identifier_orcid": "0000-0002-1825-0097",
+                    "identifier_kc_username": "ghostrjtester",
                 },
             },
             "external_id": "ghostrjtester",
diff --git a/site/tests/fixtures/users.py b/site/tests/fixtures/users.py
index 102070892..e27fd9735 100644
--- a/site/tests/fixtures/users.py
+++ b/site/tests/fixtures/users.py
@@ -1,10 +1,12 @@
 from typing import Callable, Optional, Union
+from flask import current_app
 from flask_login import login_user
 from flask_principal import Identity
 from flask_security.utils import hash_password
 from invenio_access.models import ActionRoles, Role
 from invenio_access.permissions import superuser_access
 from invenio_accounts.models import User
+from invenio_accounts.proxies import current_accounts
 from invenio_accounts.testutils import login_user_via_session
 from invenio_administration.permissions import administration_access_action
 from invenio_oauthclient.models import UserIdentity
@@ -88,6 +90,8 @@ def make_user(
         admin: bool = False,
         saml_src: Optional[str] = "knowledgeCommons",
         saml_id: Optional[str] = "myuser",
+        orcid: Optional[str] = "",
+        kc_username: Optional[str] = "",
         new_remote_data: dict = {},
     ) -> AugmentedUserFixture:
         """Create a user.
@@ -116,6 +120,9 @@ def make_user(
         # Mock the remote api call.
         mock_adapter = mock_user_data_api(saml_id, mock_remote_data)
 
+        if not orcid and new_remote_data.get("orcid"):
+            orcid = new_remote_data.get("orcid")
+
         u = AugmentedUserFixture(
             email=email,
             password=hash_password(password),
@@ -134,10 +141,25 @@ def make_user(
             )
             datastore.add_role_to_user(u.user, role)
 
-        if saml_src and saml_id:
+        if u.user and orcid:
+            profile = u.user.user_profile
+            profile["identifier_orcid"] = orcid
+            u.user.user_profile = profile
+
+        if u.user and kc_username:
+            profile = u.user.user_profile
+            profile["identifier_kc_username"] = kc_username
+            u.user.user_profile = profile
+
+        if u.user and saml_src and saml_id:
+            u.user.username = f"{saml_src}-{saml_id}"
+            profile = u.user.user_profile
+            profile["identifier_kc_username"] = saml_id
+            u.user.user_profile = profile
             UserIdentity.create(u.user, saml_src, saml_id)
             u.mock_adapter = mock_adapter
 
+        current_accounts.datastore.commit()
         db.session.commit()
 
         return u
diff --git a/site/tests/fixtures/vocabularies/resource_types.py b/site/tests/fixtures/vocabularies/resource_types.py
index 728d8dc32..2e2119057 100644
--- a/site/tests/fixtures/vocabularies/resource_types.py
+++ b/site/tests/fixtures/vocabularies/resource_types.py
@@ -1,6 +1,8 @@
+import copy
 import pytest
 
 from invenio_access.permissions import system_identity
+from invenio_pidstore.errors import PIDAlreadyExists
 from invenio_vocabularies.records.api import Vocabulary
 from invenio_vocabularies.proxies import current_service as vocabulary_service
 
@@ -8,9 +10,7 @@
 @pytest.fixture(scope="module")
 def resource_type_type(app):
     """Resource type vocabulary type."""
-    return vocabulary_service.create_type(
-        system_identity, "resourcetypes", "rsrct"
-    )
+    return vocabulary_service.create_type(system_identity, "resourcetypes", "rsrct")
 
 
 RESOURCE_TYPES = [
@@ -252,9 +252,14 @@ def resource_type_type(app):
 
 
 @pytest.fixture(scope="module")
-def resource_type_v(app, resource_type_type):
+def resource_types():
+    return copy.deepcopy(RESOURCE_TYPES)
+
+
+@pytest.fixture(scope="module")
+def resource_type_v(app, resource_type_type, resource_types):
     """Resource type vocabulary record."""
-    for resource_type in RESOURCE_TYPES:
+    for resource_type in resource_types:
         vocabulary_service.create(system_identity, resource_type)
 
     Vocabulary.index.refresh()
diff --git a/site/tests/fixtures/vocabularies/subjects.py b/site/tests/fixtures/vocabularies/subjects.py
index 1661cf54a..c96baf10a 100644
--- a/site/tests/fixtures/vocabularies/subjects.py
+++ b/site/tests/fixtures/vocabularies/subjects.py
@@ -187,11 +187,6 @@ def subjects_service(app):
         "scheme": "FAST-topical",
         "subject": "East Asian literature",
     },
-    {
-        "id": "http://id.worldcat.org/fast/1047055",
-        "scheme": "FAST-topical",
-        "subject": "Oral history",
-    },
     {
         "id": "http://id.worldcat.org/fast/1710945",
         "scheme": "FAST-topical",
diff --git a/site/tests/helpers/sample_files/sample.csv b/site/tests/helpers/sample_files/sample.csv
new file mode 100644
index 000000000..2041c1594
--- /dev/null
+++ b/site/tests/helpers/sample_files/sample.csv
@@ -0,0 +1,101 @@
+Index,Customer Id,First Name,Last Name,Company,City,Country,Phone 1,Phone 2,Email,Subscription Date,Website
+1,DD37Cf93aecA6Dc,Sheryl,Baxter,Rasmussen Group,East Leonard,Chile,229.077.5154,397.884.0519x718,zunigavanessa@smith.info,2020-08-24,http://www.stephenson.com/
+2,1Ef7b82A4CAAD10,Preston,Lozano,Vega-Gentry,East Jimmychester,Djibouti,5153435776,686-620-1820x944,vmata@colon.com,2021-04-23,http://www.hobbs.com/
+3,6F94879bDAfE5a6,Roy,Berry,Murillo-Perry,Isabelborough,Antigua and Barbuda,+1-539-402-0259,(496)978-3969x58947,beckycarr@hogan.com,2020-03-25,http://www.lawrence.com/
+4,5Cef8BFA16c5e3c,Linda,Olsen,"Dominguez, Mcmillan and Donovan",Bensonview,Dominican Republic,001-808-617-6467x12895,+1-813-324-8756,stanleyblackwell@benson.org,2020-06-02,http://www.good-lyons.com/
+5,053d585Ab6b3159,Joanna,Bender,"Martin, Lang and Andrade",West Priscilla,Slovakia (Slovak Republic),001-234-203-0635x76146,001-199-446-3860x3486,colinalvarado@miles.net,2021-04-17,https://goodwin-ingram.com/
+6,2d08FB17EE273F4,Aimee,Downs,Steele Group,Chavezborough,Bosnia and Herzegovina,(283)437-3886x88321,999-728-1637,louis27@gilbert.com,2020-02-25,http://www.berger.net/
+7,EA4d384DfDbBf77,Darren,Peck,"Lester, Woodard and Mitchell",Lake Ana,Pitcairn Islands,(496)452-6181x3291,+1-247-266-0963x4995,tgates@cantrell.com,2021-08-24,https://www.le.com/
+8,0e04AFde9f225dE,Brett,Mullen,"Sanford, Davenport and Giles",Kimport,Bulgaria,001-583-352-7197x297,001-333-145-0369,asnow@colon.com,2021-04-12,https://hammond-ramsey.com/
+9,C2dE4dEEc489ae0,Sheryl,Meyers,Browning-Simon,Robersonstad,Cyprus,854-138-4911x5772,+1-448-910-2276x729,mariokhan@ryan-pope.org,2020-01-13,https://www.bullock.net/
+10,8C2811a503C7c5a,Michelle,Gallagher,Beck-Hendrix,Elaineberg,Timor-Leste,739.218.2516x459,001-054-401-0347x617,mdyer@escobar.net,2021-11-08,https://arias.com/
+11,216E205d6eBb815,Carl,Schroeder,"Oconnell, Meza and Everett",Shannonville,Guernsey,637-854-0256x825,114.336.0784x788,kirksalas@webb.com,2021-10-20,https://simmons-hurley.com/
+12,CEDec94deE6d69B,Jenna,Dodson,"Hoffman, Reed and Mcclain",East Andrea,Vietnam,(041)737-3846,+1-556-888-3485x42608,mark42@robbins.com,2020-11-29,http://www.douglas.net/
+13,e35426EbDEceaFF,Tracey,Mata,Graham-Francis,South Joannamouth,Togo,001-949-844-8787,(855)713-8773,alex56@walls.org,2021-12-02,http://www.beck.com/
+14,A08A8aF8BE9FaD4,Kristine,Cox,Carpenter-Cook,Jodyberg,Sri Lanka,786-284-3358x62152,+1-315-627-1796x8074,holdenmiranda@clarke.com,2021-02-08,https://www.brandt.com/
+15,6fEaA1b7cab7B6C,Faith,Lutz,Carter-Hancock,Burchbury,Singapore,(781)861-7180x8306,207-185-3665,cassieparrish@blevins-chapman.net,2022-01-26,http://stevenson.org/
+16,8cad0b4CBceaeec,Miranda,Beasley,Singleton and Sons,Desireeshire,Oman,540.085.3135x185,+1-600-462-6432x21881,vduncan@parks-hardy.com,2022-04-12,http://acosta.org/
+17,a5DC21AE3a21eaA,Caroline,Foley,Winters-Mendoza,West Adriennestad,Western Sahara,936.222.4746x9924,001-469-948-6341x359,holtgwendolyn@watson-davenport.com,2021-03-10,http://www.benson-roth.com/
+18,F8Aa9d6DfcBeeF8,Greg,Mata,Valentine LLC,Lake Leslie,Mozambique,(701)087-2415,(195)156-1861x26241,jaredjuarez@carroll.org,2022-03-26,http://pitts-cherry.com/
+19,F160f5Db3EfE973,Clifford,Jacobson,Simon LLC,Harmonview,South Georgia and the South Sandwich Islands,001-151-330-3524x0469,(748)477-7174,joseph26@jacobson.com,2020-09-24,https://mcconnell.com/
+20,0F60FF3DdCd7aB0,Joanna,Kirk,Mays-Mccormick,Jamesshire,French Polynesia,(266)131-7001x711,(283)312-5579x11543,tuckerangie@salazar.net,2021-09-24,https://www.camacho.net/
+21,9F9AdB7B8A6f7F2,Maxwell,Frye,Patterson Inc,East Carly,Malta,423.262.3059,202-880-0688x7491,fgibson@drake-webb.com,2022-01-12,http://www.roberts.com/
+22,FBd0Ded4F02a742,Kiara,Houston,"Manning, Hester and Arroyo",South Alvin,Netherlands,001-274-040-3582x10611,+1-528-175-0973x4684,blanchardbob@wallace-shannon.com,2020-09-15,https://www.reid-potts.com/
+23,2FB0FAA1d429421,Colleen,Howard,Greer and Sons,Brittanyview,Paraguay,1935085151,(947)115-7711x5488,rsingleton@ryan-cherry.com,2020-08-19,http://paul.biz/
+24,010468dAA11382c,Janet,Valenzuela,Watts-Donaldson,Veronicamouth,Lao People's Democratic Republic,354.259.5062x7538,500.433.2022,stefanie71@spence.com,2020-09-08,https://moreno.biz/
+25,eC1927Ca84E033e,Shane,Wilcox,Tucker LLC,Bryanville,Albania,(429)005-9030x11004,541-116-4501,mariah88@santos.com,2021-04-06,https://www.ramos.com/
+26,09D7D7C8Fe09aea,Marcus,Moody,Giles Ltd,Kaitlyntown,Panama,674-677-8623,909-277-5485x566,donnamullins@norris-barrett.org,2022-05-24,https://www.curry.com/
+27,aBdfcF2c50b0bfD,Dakota,Poole,Simmons Group,Michealshire,Belarus,(371)987-8576x4720,071-152-1376,stacey67@fields.org,2022-02-20,https://sanford-wilcox.biz/
+28,b92EBfdF8a3f0E6,Frederick,Harper,"Hinton, Chaney and Stokes",South Marissatown,Switzerland,+1-077-121-1558x0687,264.742.7149,jacobkhan@bright.biz,2022-05-26,https://callahan.org/
+29,3B5dAAFA41AFa22,Stefanie,Fitzpatrick,Santana-Duran,Acevedoville,Saint Vincent and the Grenadines,(752)776-3286,+1-472-021-4814x85074,wterrell@clark.com,2020-07-30,https://meyers.com/
+30,EDA69ca7a6e96a2,Kent,Bradshaw,Sawyer PLC,North Harold,Tanzania,+1-472-143-5037x884,126.922.6153,qjimenez@boyd.com,2020-04-26,http://maynard-ho.com/
+31,64DCcDFaB9DFd4e,Jack,Tate,"Acosta, Petersen and Morrow",West Samuel,Zimbabwe,965-108-4406x20714,046.906.1442x6784,gfigueroa@boone-zavala.com,2021-09-15,http://www.hawkins-ramsey.com/
+32,679c6c83DD872d6,Tom,Trujillo,Mcgee Group,Cunninghamborough,Denmark,416-338-3758,(775)890-7209,tapiagreg@beard.info,2022-01-13,http://www.daniels-klein.com/
+33,7Ce381e4Afa4ba9,Gabriel,Mejia,Adkins-Salinas,Port Annatown,Liechtenstein,4077245425,646.044.0696x66800,coleolson@jennings.net,2021-04-24,https://patel-hanson.info/
+34,A09AEc6E3bF70eE,Kaitlyn,Santana,Herrera Group,New Kaitlyn,United States of America,6303643286,447-710-6202x07313,georgeross@miles.org,2021-09-21,http://pham.com/
+35,aA9BAFfBc3710fe,Faith,Moon,"Waters, Chase and Aguilar",West Marthaburgh,Bahamas,+1-586-217-0359x6317,+1-818-199-1403,willistonya@randolph-baker.com,2021-11-03,https://spencer-charles.info/
+36,E11dfb2DB8C9f72,Tammie,Haley,"Palmer, Barnes and Houston",East Teresa,Belize,001-276-734-4113x6087,(430)300-8770,harrisisaiah@jenkins.com,2022-01-04,http://evans-simon.com/
+37,889eCf90f68c5Da,Nicholas,Sosa,Jordan Ltd,South Hunter,Uruguay,(661)425-6042,975-998-1519,fwolfe@dorsey.com,2021-08-10,https://www.fleming-richards.com/
+38,7a1Ee69F4fF4B4D,Jordan,Gay,Glover and Sons,South Walter,Solomon Islands,7208417020,8035336772,tiffanydavies@harris-mcfarland.org,2021-02-24,http://www.lee.org/
+39,dca4f1D0A0fc5c9,Bruce,Esparza,Huerta-Mclean,Poolefurt,Montenegro,559-529-4424,001-625-000-7132x0367,preese@frye-vega.com,2021-10-22,http://www.farley.org/
+40,17aD8e2dB3df03D,Sherry,Garza,Anderson Ltd,West John,Poland,001-067-713-6440x158,(978)289-8785x5766,ann48@miller.com,2021-11-01,http://spence.com/
+41,2f79Cd309624Abb,Natalie,Gentry,Monroe PLC,West Darius,Dominican Republic,830.996.8238,499.122.5415,tcummings@fitzpatrick-ashley.com,2020-10-10,http://www.dorsey.biz/
+42,6e5ad5a5e2bB5Ca,Bryan,Dunn,Kaufman and Sons,North Jimstad,Burkina Faso,001-710-802-5565,078.699.8982x13881,woodwardandres@phelps.com,2021-09-08,http://www.butler.com/
+43,7E441b6B228DBcA,Wayne,Simpson,Perkins-Trevino,East Rebekahborough,Bolivia,(344)156-8632x1869,463-445-3702x38463,barbarapittman@holder.com,2020-12-13,https://gillespie-holder.com/
+44,D3fC11A9C235Dc6,Luis,Greer,Cross PLC,North Drew,Bulgaria,001-336-025-6849x701,684.698.2911x6092,bstuart@williamson-mcclure.com,2022-05-15,https://fletcher-nielsen.com/
+45,30Dfa48fe5Ede78,Rhonda,Frost,"Herrera, Shepherd and Underwood",Lake Lindaburgh,Monaco,(127)081-9339,+1-431-028-3337x3492,zkrueger@wolf-chavez.net,2021-12-06,http://www.khan.com/
+46,fD780ED8dbEae7B,Joanne,Montes,"Price, Sexton and Mcdaniel",Gwendolynview,Palau,(897)726-7952,(467)886-9467x5721,juan80@henson.net,2020-07-01,http://ochoa.com/
+47,300A40d3ce24bBA,Geoffrey,Guzman,Short-Wiggins,Zimmermanland,Uzbekistan,975.235.8921x269,(983)188-6873,bauercrystal@gay.com,2020-04-23,https://decker-kline.com/
+48,283DFCD0Dba40aF,Gloria,Mccall,"Brennan, Acosta and Ramos",North Kerriton,Ghana,445-603-6729,001-395-959-4736x4524,bartlettjenna@zuniga-moss.biz,2022-03-11,http://burgess-frank.com/
+49,F4Fc91fEAEad286,Brady,Cohen,Osborne-Erickson,North Eileenville,United Arab Emirates,741.849.0139x524,+1-028-691-7497x0894,mccalltyrone@durham-rose.biz,2022-03-10,http://hammond-barron.com/
+50,80F33Fd2AcebF05,Latoya,Mccann,"Hobbs, Garrett and Sanford",Port Sergiofort,Belarus,(530)287-4548x29481,162-234-0249x32790,bobhammond@barry.biz,2021-12-02,https://www.burton.com/
+51,Aa20BDe68eAb0e9,Gerald,Hawkins,"Phelps, Forbes and Koch",New Alberttown,Canada,+1-323-239-1456x96168,(092)508-0269,uwarner@steele-arias.com,2021-03-19,https://valenzuela.com/
+52,e898eEB1B9FE22b,Samuel,Crawford,"May, Goodwin and Martin",South Jasmine,Algeria,802-242-7457,626.116.9535x8578,xpittman@ritter-carney.net,2021-03-27,https://guerrero.org/
+53,faCEF517ae7D8eB,Patricia,Goodwin,"Christian, Winters and Ellis",Cowanfort,Swaziland,322.549.7139x70040,(111)741-4173,vaughanchristy@lara.biz,2021-03-08,http://clark.info/
+54,c09952De6Cda8aA,Stacie,Richard,Byrd Inc,New Deborah,Madagascar,001-622-948-3641x24810,001-731-168-2893x8891,clinton85@colon-arias.org,2020-10-15,https://kim.com/
+55,f3BEf3Be028166f,Robin,West,"Nixon, Blackwell and Sosa",Wallstown,Ecuador,698.303.4267,001-683-837-7651x525,greenemiranda@zimmerman.com,2022-01-13,https://www.mora.com/
+56,C6F2Fc6a7948a4e,Ralph,Haas,Montes PLC,Lake Ellenchester,Palestinian Territory,2239271999,001-962-434-0867x649,goodmancesar@figueroa.biz,2020-05-25,http://may.com/
+57,c8FE57cBBdCDcb2,Phyllis,Maldonado,Costa PLC,Lake Whitney,Saint Barthelemy,4500370767,001-508-064-6725x017,yhanson@warner-diaz.org,2021-01-25,http://www.bernard.com/
+58,B5acdFC982124F2,Danny,Parrish,Novak LLC,East Jaredbury,United Arab Emirates,(669)384-8597x8794,506.731.5952x571,howelldarren@house-cohen.com,2021-03-17,http://www.parsons-hudson.com/
+59,8c7DdF10798bCC3,Kathy,Hill,"Moore, Mccoy and Glass",Selenabury,South Georgia and the South Sandwich Islands,001-171-716-2175x310,888.625.0654,ncamacho@boone-simmons.org,2020-11-15,http://hayden.com/
+60,C681dDd0cc422f7,Kelli,Hardy,Petty Ltd,Huangfort,Sao Tome and Principe,020.324.2191x2022,424-157-8216,kristopher62@oliver.com,2020-12-20,http://www.kidd.com/
+61,a940cE42e035F28,Lynn,Pham,"Brennan, Camacho and Tapia",East Pennyshire,Portugal,846.468.6834x611,001-248-691-0006,mpham@rios-guzman.com,2020-08-21,https://www.murphy.com/
+62,9Cf5E6AFE0aeBfd,Shelley,Harris,"Prince, Malone and Pugh",Port Jasminborough,Togo,423.098.0315x8373,+1-386-458-8944x15194,zachary96@mitchell-bryant.org,2020-12-10,https://www.ryan.com/
+63,aEcbe5365BbC67D,Eddie,Jimenez,Caldwell Group,West Kristine,Ethiopia,+1-235-657-1073x6306,(026)401-7353x2417,kristiwhitney@bernard.com,2022-03-24,http://cherry.com/
+64,FCBdfCEAe20A8Dc,Chloe,Hutchinson,Simon LLC,South Julia,Netherlands,981-544-9452,+1-288-552-4666x060,leah85@sutton-terrell.com,2022-05-15,https://mitchell.info/
+65,636cBF0835E10ff,Eileen,Lynch,"Knight, Abbott and Hubbard",Helenborough,Liberia,+1-158-951-4131x53578,001-673-779-6713x680,levigiles@vincent.com,2021-01-02,http://mckay.com/
+66,fF1b6c9E8Fbf1ff,Fernando,Lambert,Church-Banks,Lake Nancy,Lithuania,497.829.9038,3863743398,fisherlinda@schaefer.net,2021-04-23,https://www.vang.com/
+67,2A13F74EAa7DA6c,Makayla,Cannon,Henderson Inc,Georgeport,New Caledonia,001-215-801-6392x46009,027-609-6460,scottcurtis@hurley.biz,2020-01-20,http://www.velazquez.net/
+68,a014Ec1b9FccC1E,Tom,Alvarado,Donaldson-Dougherty,South Sophiaberg,Kiribati,(585)606-2980x2258,730-797-3594x5614,nicholsonnina@montgomery.info,2020-08-18,http://odom-massey.com/
+69,421a109cABDf5fa,Virginia,Dudley,Warren Ltd,Hartbury,French Southern Territories,027.846.3705x14184,+1-439-171-1846x4636,zvalencia@phelps.com,2021-01-31,http://hunter-esparza.com/
+70,CC68FD1D3Bbbf22,Riley,Good,Wade PLC,Erikaville,Canada,6977745822,855-436-7641,alex06@galloway.com,2020-02-03,http://conway.org/
+71,CBCd2Ac8E3eBDF9,Alexandria,Buck,Keller-Coffey,Nicolasfort,Iran,078-900-4760x76668,414-112-8700x68751,lee48@manning.com,2021-02-20,https://ramsey.org/
+72,Ef859092FbEcC07,Richard,Roth,Conway-Mcbride,New Jasmineshire,Morocco,581-440-6539,9857827463,aharper@maddox-townsend.org,2020-02-23,https://www.brooks.com/
+73,F560f2d3cDFb618,Candice,Keller,Huynh and Sons,East Summerstad,Zimbabwe,001-927-965-8550x92406,001-243-038-4271x53076,buckleycory@odonnell.net,2020-08-22,https://www.lucero.com/
+74,A3F76Be153Df4a3,Anita,Benson,Parrish Ltd,Skinnerport,Russian Federation,874.617.5668x69878,(399)820-6418x0071,angie04@oconnell.com,2020-02-09,http://oconnor.com/
+75,D01Af0AF7cBbFeA,Regina,Stein,Guzman-Brown,Raystad,Solomon Islands,001-469-848-0724x4407,001-085-360-4426x00357,zrosario@rojas-hardin.net,2022-01-15,http://www.johnston.info/
+76,d40e89dCade7b2F,Debra,Riddle,"Chang, Aguirre and Leblanc",Colinhaven,United States Virgin Islands,+1-768-182-6014x14336,(303)961-4491,shieldskerry@robles.com,2020-07-11,http://kaiser.info/
+77,BF6a1f9bd1bf8DE,Brittany,Zuniga,Mason-Hester,West Reginald,Kyrgyz Republic,(050)136-9025,001-480-851-2496x0157,mchandler@cochran-huerta.org,2021-07-24,http://www.boyle.com/
+78,FfaeFFbbbf280db,Cassidy,Mcmahon,"Mcguire, Huynh and Hopkins",Lake Sherryborough,Myanmar,5040771311,684-682-0021x1326,katrinalane@fitzgerald.com,2020-10-21,https://hurst.com/
+79,CbAE1d1e9a8dCb1,Laurie,Pennington,"Sanchez, Marsh and Hale",Port Katherineville,Dominica,007.155.3406x553,+1-809-862-5566x277,cookejill@powell.com,2020-06-08,http://www.hebert.com/
+80,A7F85c1DE4dB87f,Alejandro,Blair,"Combs, Waller and Durham",Thomasland,Iceland,(690)068-4641x51468,555.509.8691x2329,elizabethbarr@ewing.com,2020-09-19,https://mercado-blevins.com/
+81,D6CEAfb3BDbaa1A,Leslie,Jennings,Blankenship-Arias,Coreybury,Micronesia,629.198.6346,075.256.0829,corey75@wiggins.com,2021-11-13,https://www.juarez.com/
+82,Ebdb6F6F7c90b69,Kathleen,Mckay,"Coffey, Lamb and Johnson",Lake Janiceton,Saint Vincent and the Grenadines,(733)910-9968,(691)247-4128x0665,chloelester@higgins-wilkinson.com,2021-09-12,http://www.owens-mooney.com/
+83,E8E7e8Cfe516ef0,Hunter,Moreno,Fitzpatrick-Lawrence,East Clinton,Isle of Man,(733)833-6754,001-761-013-7121,isaac26@benton-finley.com,2020-12-28,http://walls.info/
+84,78C06E9b6B3DF20,Chad,Davidson,Garcia-Jimenez,South Joshuashire,Oman,8275702958,(804)842-4715,justinwalters@jimenez.com,2021-11-15,http://www.garner-oliver.com/
+85,03A1E62ADdeb31c,Corey,Holt,"Mcdonald, Bird and Ramirez",New Glenda,Fiji,001-439-242-4986x7918,3162708934,maurice46@morgan.com,2020-02-18,http://www.watson.com/
+86,C6763c99d0bd16D,Emma,Cunningham,Stephens Inc,North Jillianview,New Zealand,128-059-0206x60217,(312)164-4545x2284,walter83@juarez.org,2022-05-13,http://www.reid.info/
+87,ebe77E5Bf9476CE,Duane,Woods,Montoya-Miller,Lyonsberg,Maldives,(636)544-7783x7288,(203)287-1003x5932,kmercer@wagner.com,2020-07-21,http://murray.org/
+88,E4Bbcd8AD81fC5f,Alison,Vargas,"Vaughn, Watts and Leach",East Cristinabury,Benin,365-273-8144,053-308-7653x6287,vcantu@norton.com,2020-11-10,http://mason.info/
+89,efeb73245CDf1fF,Vernon,Kane,Carter-Strickland,Thomasfurt,Yemen,114-854-1159x555,499-608-4612,hilljesse@barrett.info,2021-04-15,http://www.duffy-hensley.net/
+90,37Ec4B395641c1E,Lori,Flowers,Decker-Mcknight,North Joeburgh,Namibia,679.415.1210,945-842-3659x4581,tyrone77@valenzuela.info,2021-01-09,http://www.deleon-crosby.com/
+91,5ef6d3eefdD43bE,Nina,Chavez,Byrd-Campbell,Cassidychester,Bhutan,053-344-3205,+1-330-920-5422x571,elliserica@frank.com,2020-03-26,https://www.pugh.com/
+92,98b3aeDcC3B9FF3,Shane,Foley,Rocha-Hart,South Dannymouth,Hungary,+1-822-569-0302,001-626-114-5844x55073,nsteele@sparks.com,2021-07-06,https://www.holt-sparks.com/
+93,aAb6AFc7AfD0fF3,Collin,Ayers,Lamb-Peterson,South Lonnie,Anguilla,404-645-5351x012,001-257-582-8850x8516,dudleyemily@gonzales.biz,2021-06-29,http://www.ruiz.com/
+94,54B5B5Fe9F1B6C5,Sherry,Young,"Lee, Lucero and Johnson",Frankchester,Solomon Islands,158-687-1764,(438)375-6207x003,alan79@gates-mclaughlin.com,2021-04-04,https://travis.net/
+95,BE91A0bdcA49Bbc,Darrell,Douglas,"Newton, Petersen and Mathis",Daisyborough,Mali,001-084-845-9524x1777,001-769-564-6303,grayjean@lowery-good.com,2022-02-17,https://banks.biz/
+96,cb8E23e48d22Eae,Karl,Greer,Carey LLC,East Richard,Guyana,(188)169-1674x58692,001-841-293-3519x614,hhart@jensen.com,2022-01-30,http://hayes-perez.com/
+97,CeD220bdAaCfaDf,Lynn,Atkinson,"Ware, Burns and Oneal",New Bradview,Sri Lanka,+1-846-706-2218,605.413.3198,vkemp@ferrell.com,2021-07-10,https://novak-allison.com/
+98,28CDbC0dFe4b1Db,Fred,Guerra,Schmitt-Jones,Ortegaland,Solomon Islands,+1-753-067-8419x7170,+1-632-666-7507x92121,swagner@kane.org,2021-09-18,https://www.ross.com/
+99,c23d1D9EE8DEB0A,Yvonne,Farmer,Fitzgerald-Harrell,Lake Elijahview,Aruba,(530)311-9786,001-869-452-0943x12424,mccarthystephen@horn-green.biz,2021-08-11,http://watkins.info/
+100,2354a0E336A91A1,Clarence,Haynes,"Le, Nash and Cross",Judymouth,Honduras,(753)813-6941,783.639.1472,colleen91@faulkner.biz,2020-03-11,http://www.hatfield-saunders.net/
diff --git a/site/tests/helpers/sample_files/sample.jpg b/site/tests/helpers/sample_files/sample.jpg
new file mode 100644
index 000000000..04ef15be2
Binary files /dev/null and b/site/tests/helpers/sample_files/sample.jpg differ
diff --git a/site/tests/helpers/sample_files/sample2.pdf b/site/tests/helpers/sample_files/sample2.pdf
new file mode 100644
index 000000000..774c2ea70
Binary files /dev/null and b/site/tests/helpers/sample_files/sample2.pdf differ
diff --git a/site/tests/helpers/sample_records/__init__.py b/site/tests/helpers/sample_records/__init__.py
index 0a9d00db2..01a01c830 100644
--- a/site/tests/helpers/sample_records/__init__.py
+++ b/site/tests/helpers/sample_records/__init__.py
@@ -1,13 +1,17 @@
 from .sample_metadata_presentation_pdf import sample_metadata_presentation_pdf
-from .sample11451 import rec11451
-from .sample16079 import rec16079
-from .sample22625 import rec22625
-from .sample22647 import rec22647
-from .sample28491 import rec28491
-from .sample33383 import rec33383
-from .sample34031 import rec34031
-from .sample38367 import rec38367
-from .sample42615 import rec42615
-from .sample44881 import rec44881
-from .sample45177 import rec45177
-from .sample48799 import rec48799
+from .sample_metadata_chapter_pdf import sample_metadata_chapter_pdf
+from .sample_metadata_conference_proceedings_pdf import (
+    sample_metadata_conference_proceedings_pdf,
+)
+from .sample_metadata_chapter2_pdf import sample_metadata_chapter2_pdf
+from .sample_metadata_chapter3_pdf import sample_metadata_chapter3_pdf
+from .sample_metadata_book_pdf import sample_metadata_book_pdf
+from .sample_metadata_journal_article_pdf import sample_metadata_journal_article_pdf
+from .sample_metadata_chapter4_pdf import sample_metadata_chapter4_pdf
+from .sample_metadata_thesis_pdf import sample_metadata_thesis_pdf
+from .sample_metadata_journal_article2_pdf import sample_metadata_journal_article2_pdf
+from .sample_metadata_interview_transcript_pdf import (
+    sample_metadata_interview_transcript_pdf,
+)
+from .sample_metadata_chapter5_pdf import sample_metadata_chapter5_pdf
+from .sample_metadata_white_paper_pdf import sample_metadata_white_paper_pdf
diff --git a/site/tests/helpers/sample_records/sample28491.py b/site/tests/helpers/sample_records/sample_metadata_book_pdf.py
similarity index 97%
rename from site/tests/helpers/sample_records/sample28491.py
rename to site/tests/helpers/sample_records/sample_metadata_book_pdf.py
index 74b468431..e458ef74e 100644
--- a/site/tests/helpers/sample_records/sample28491.py
+++ b/site/tests/helpers/sample_records/sample_metadata_book_pdf.py
@@ -1,4 +1,4 @@
-rec28491 = {
+sample_metadata_book_pdf = {
     "input": {
         "created": "2020-01-30T16:46:54Z",
         "custom_fields": {
@@ -18,9 +18,7 @@
                 },
                 {
                     "group_identifier": "1003089",
-                    "group_name": (
-                        "Foreign Language Teaching and the Environment"
-                    ),
+                    "group_name": ("Foreign Language Teaching and the Environment"),
                 },
                 {
                     "group_identifier": "1003408",
@@ -203,8 +201,7 @@
             "publication_date": "2008",
             "resource_type": {"id": "textDocument-book"},
             "publisher": (
-                "Editorial ACRIBIA, S. A., Apartado 466, 50080, "
-                "Zaragoza, Espana."
+                "Editorial ACRIBIA, S. A., Apartado 466, 50080, " "Zaragoza, Espana."
             ),
             "rights": [
                 {
@@ -218,9 +215,7 @@
                     "id": "arr",
                     "icon": "copyright",
                     "props": {
-                        "url": (
-                            "https://en.wikipedia.org/wiki/All_rights_reserved"
-                        )
+                        "url": ("https://en.wikipedia.org/wiki/All_rights_reserved")
                     },
                     "title": {"en": "All Rights Reserved"},
                 }
@@ -272,9 +267,7 @@
                 },
                 {
                     "group_identifier": "1003089",
-                    "group_name": (
-                        "Foreign Language Teaching and the Environment"
-                    ),
+                    "group_name": ("Foreign Language Teaching and the Environment"),
                 },
                 {
                     "group_identifier": "1003408",
@@ -423,8 +416,7 @@
             "publication_date": "2008",
             "resource_type": {"id": "textDocument-book"},
             "publisher": (
-                "Editorial ACRIBIA, S. A., Apartado 466, 50080, "
-                "Zaragoza, Espana."
+                "Editorial ACRIBIA, S. A., Apartado 466, 50080, " "Zaragoza, Espana."
             ),
             "rights": [{"id": "arr"}],
             "subjects": [
@@ -474,9 +466,7 @@
                 },
                 {
                     "group_identifier": "1003089",
-                    "group_name": (
-                        "Foreign Language Teaching and the Environment"
-                    ),
+                    "group_name": ("Foreign Language Teaching and the Environment"),
                 },
                 {
                     "group_identifier": "1003408",
@@ -668,8 +658,7 @@
                 "title": {"en": "Book"},
             },
             "publisher": (
-                "Editorial ACRIBIA, S. A., Apartado 466, 50080, "
-                "Zaragoza, Espana."
+                "Editorial ACRIBIA, S. A., Apartado 466, 50080, " "Zaragoza, Espana."
             ),
             "rights": [
                 {
diff --git a/site/tests/helpers/sample_records/sample22625.py b/site/tests/helpers/sample_records/sample_metadata_chapter2_pdf.py
similarity index 95%
rename from site/tests/helpers/sample_records/sample22625.py
rename to site/tests/helpers/sample_records/sample_metadata_chapter2_pdf.py
index db0493ed1..ce5d51554 100644
--- a/site/tests/helpers/sample_records/sample22625.py
+++ b/site/tests/helpers/sample_records/sample_metadata_chapter2_pdf.py
@@ -1,6 +1,6 @@
-rec22625 = {
+sample_metadata_chapter2_pdf = {
     "input": {
-        "created": "2019-01-29T03:57:00Z",
+        # "created": "2019-01-29T03:57:00Z",
         "pids": {
             "doi": {
                 "client": "datacite",
@@ -36,21 +36,21 @@
                 "pages": "57-74",
                 "title": "Habad Hasidism: History, Thought, Image",
             },
-            "hclegacy:groups_for_deposit": [
-                {
-                    "group_identifier": "1000610",
-                    "group_name": "Jewish Mysticism",
-                },
-                {
-                    "group_identifier": "1000611",
-                    "group_name": "Modern Jewish Thought and Theology",
-                },
-            ],
+            # "hclegacy:groups_for_deposit": [
+            #     {
+            #         "group_identifier": "1000610",
+            #         "group_name": "Jewish Mysticism",
+            #     },
+            #     {
+            #         "group_identifier": "1000611",
+            #         "group_name": "Modern Jewish Thought and Theology",
+            #     },
+            # ],
             "hclegacy:record_change_date": "2019-01-29T03:57:00Z",
             "hclegacy:record_creation_date": "2019-01-29T03:57:00Z",
         },
         "files": {
-            "default_preview": "55710426.pdf",
+            # "default_preview": "55710426.pdf",
             "enabled": True,
             "entries": {
                 "55710426.pdf": {
@@ -65,7 +65,8 @@
             #     [{'description': ('The issue of gender has been a topic of '
             #                      'discussion in the research of Hasidism since S. '
             #                      'A. Horodecky’s book (1923), in which he '
-            #                      'claimed that Hasidism brought about full ' 'equality of Jewish men and women in the field '
+            #                      'claimed that Hasidism brought about full '
+            #                      'equality of Jewish men and women in the field '
             #                      'of spirituality. Although his claims have been '
             #                      'by and large rejected, most\nscholars agree that '
             #                      'the twentieth century Chabad movement has '
@@ -216,8 +217,19 @@
             "publisher": "Zalman Shazar Center",
             "title": "מגדר וזמן בכתבי ר׳ שניאור זלמן מלאדי",
         },
-        "parent": {"access": {"owned_by": [{"user": "1017065"}]}},
-        "updated": "2019-01-29T03:57:00Z",
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {
+                        "email": "test@example.com",
+                        "identifiers": [
+                            {"identifier": "1017065", "scheme": "neh_user_id"},
+                        ],
+                    }
+                ]
+            }
+        },
+        # "updated": "2019-01-29T03:57:00Z",
     },
     "expected_serialized": {
         "record_source": "knowledgeCommons",
@@ -382,8 +394,15 @@
             "publisher": "Zalman Shazar Center",
             "title": "מגדר וזמן בכתבי ר׳ שניאור זלמן מלאדי",
         },
-        "parent": {"access": {"owned_by": [{"user": "1017065"}]}},
-        "updated": "2019-01-29T03:57:00Z",
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {"identifier": "1017065", "scheme": "neh_user_id"},
+                    {"email": "test@example.com"},
+                ]
+            }
+        },
+        # "updated": "2019-01-29T03:57:00Z",
     },
     "expected_loaded": {
         "record_source": "knowledgeCommons",
diff --git a/site/tests/helpers/sample_records/sample22647.py b/site/tests/helpers/sample_records/sample_metadata_chapter3_pdf.py
similarity index 99%
rename from site/tests/helpers/sample_records/sample22647.py
rename to site/tests/helpers/sample_records/sample_metadata_chapter3_pdf.py
index 408530194..41b1d2440 100644
--- a/site/tests/helpers/sample_records/sample22647.py
+++ b/site/tests/helpers/sample_records/sample_metadata_chapter3_pdf.py
@@ -1,4 +1,4 @@
-rec22647 = {
+sample_metadata_chapter3_pdf = {
     "input": {
         "created": "2019-02-01T19:30:52Z",
         "pids": {
@@ -836,9 +836,7 @@
                 },
             ],
             "publisher": "Oxford University Press",
-            "title": (
-                "Unfixing Epic: Homeric Orality and Contemporary Performance"
-            ),
+            "title": ("Unfixing Epic: Homeric Orality and Contemporary Performance"),
         },
         "parent": {"access": {"owned_by": [{"user": "1012453"}]}},
         "updated": "2019-02-01T19:30:52Z",
diff --git a/site/tests/helpers/sample_records/sample34031.py b/site/tests/helpers/sample_records/sample_metadata_chapter4_pdf.py
similarity index 98%
rename from site/tests/helpers/sample_records/sample34031.py
rename to site/tests/helpers/sample_records/sample_metadata_chapter4_pdf.py
index c7ec8dceb..7540720f5 100644
--- a/site/tests/helpers/sample_records/sample34031.py
+++ b/site/tests/helpers/sample_records/sample_metadata_chapter4_pdf.py
@@ -1,4 +1,4 @@
-rec34031 = {
+sample_metadata_chapter4_pdf = {
     "input": {
         "created": "2021-01-11T23:48:41Z",
         "pids": {
@@ -46,8 +46,7 @@
             "entries": {
                 "gnosticism-theorized-major-trends-and-approaches-dillon.pdf": {  # noqa: E501
                     "key": (
-                        "gnosticism-theorized-major-trends-and-approaches-"
-                        "dillon.pdf"
+                        "gnosticism-theorized-major-trends-and-approaches-" "dillon.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
@@ -123,9 +122,7 @@
                     "id": "arr",
                     "icon": "copyright",
                     "props": {
-                        "url": (
-                            "https://en.wikipedia.org/wiki/All_rights_reserved"
-                        )
+                        "url": ("https://en.wikipedia.org/wiki/All_rights_reserved")
                     },
                     "title": {"en": "All Rights Reserved"},
                 }
@@ -208,8 +205,7 @@
             "entries": {
                 "gnosticism-theorized-major-trends-and-approaches-dillon.pdf": {  # noqa: E501
                     "key": (
-                        "gnosticism-theorized-major-trends-and-approaches-"
-                        "dillon.pdf"
+                        "gnosticism-theorized-major-trends-and-approaches-" "dillon.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
@@ -352,8 +348,7 @@
             "entries": {
                 "gnosticism-theorized-major-trends-and-approaches-dillon.pdf": {  # noqa: E501
                     "key": (
-                        "gnosticism-theorized-major-trends-and-approaches-"
-                        "dillon.pdf"
+                        "gnosticism-theorized-major-trends-and-approaches-" "dillon.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
diff --git a/site/tests/helpers/sample_records/sample45177.py b/site/tests/helpers/sample_records/sample_metadata_chapter5_pdf.py
similarity index 98%
rename from site/tests/helpers/sample_records/sample45177.py
rename to site/tests/helpers/sample_records/sample_metadata_chapter5_pdf.py
index 65e6d12a0..0d2867e77 100644
--- a/site/tests/helpers/sample_records/sample45177.py
+++ b/site/tests/helpers/sample_records/sample_metadata_chapter5_pdf.py
@@ -1,4 +1,4 @@
-rec45177 = {
+sample_metadata_chapter5_pdf = {
     "input": {
         "created": "2022-03-31T13:53:16Z",
         "pids": {
@@ -231,9 +231,7 @@
             "contributors": [
                 {
                     "person_or_org": {
-                        "family_name": (
-                            "ARLIS/NA Cataloging Advisory Committee"
-                        ),
+                        "family_name": ("ARLIS/NA Cataloging Advisory Committee"),
                         "given_name": "",
                         "name": "ARLIS/NA Cataloging Advisory Committee",
                         "type": "personal",
@@ -528,9 +526,7 @@
             "contributors": [
                 {
                     "person_or_org": {
-                        "family_name": (
-                            "ARLIS/NA Cataloging Advisory Committee"
-                        ),
+                        "family_name": ("ARLIS/NA Cataloging Advisory Committee"),
                         "given_name": "",
                         "name": "ARLIS/NA Cataloging Advisory Committee",
                         "type": "personal",
@@ -812,9 +808,7 @@
             "contributors": [
                 {
                     "person_or_org": {
-                        "family_name": (
-                            "ARLIS/NA Cataloging Advisory Committee"
-                        ),
+                        "family_name": ("ARLIS/NA Cataloging Advisory Committee"),
                         "given_name": "",
                         "name": "ARLIS/NA Cataloging Advisory Committee",
                         "type": "personal",
diff --git a/site/tests/helpers/sample_records/sample11451.py b/site/tests/helpers/sample_records/sample_metadata_chapter_pdf.py
similarity index 80%
rename from site/tests/helpers/sample_records/sample11451.py
rename to site/tests/helpers/sample_records/sample_metadata_chapter_pdf.py
index 0d13129c0..1a57039ec 100644
--- a/site/tests/helpers/sample_records/sample11451.py
+++ b/site/tests/helpers/sample_records/sample_metadata_chapter_pdf.py
@@ -1,13 +1,7 @@
-rec11451 = {
+sample_metadata_chapter_pdf = {
     "input": {
-        "created": "2017-03-08T05:01:44Z",
-        "pids": {
-            "doi": {
-                "client": "datacite",
-                "identifier": "10.17613/M6733G",
-                "provider": "datacite",
-            }
-        },
+        # "created": "2017-03-08T05:01:44Z",
+        "pids": {},
         "custom_fields": {
             "hclegacy:collection": "hccollection:1",
             "hclegacy:file_location": (
@@ -16,27 +10,12 @@
                 "female_agency_ophelia.pdf"
             ),
             "hclegacy:file_pid": "hc:11452",
-            "hclegacy:groups_for_deposit": [
-                {
-                    "group_identifier": "174",
-                    "group_name": (
-                        "East Asian Languages and Literatures after 1900"
-                    ),
-                },
-                {
-                    "group_identifier": "246",
-                    "group_name": "Global Shakespeares",
-                },
-                {
-                    "group_identifier": "97",
-                    "group_name": "GS Drama and Performance",
-                },
-                {"group_identifier": "25", "group_name": "LLC Shakespeare"},
-                {
-                    "group_identifier": "91",
-                    "group_name": "TC Translation Studies",
-                },
-            ],
+            # "hclegacy:groups_for_deposit": [
+            #     {
+            #         "group_identifier": "174",
+            #         "group_name": ("East Asian Languages and Literatures after 1900"),
+            #     },
+            # ],
             "hclegacy:submitter_id": "49",
             "hclegacy:previously_published": "published",
             "hclegacy:record_change_date": "2017-09-16T16:41:52Z",
@@ -63,7 +42,7 @@
             ],
         },
         "files": {
-            "default_preview": "female_agency_ophelia.pdf",
+            # "default_preview": "female_agency_ophelia.pdf",
             "enabled": True,
             "entries": {
                 "female_agency_ophelia.pdf": {
@@ -74,133 +53,80 @@
             },
         },
         "metadata": {
-            "additional_descriptions": [
-                {
-                    "description": (
-                        "There are three "
-                        "main East Asian "
-                        "approaches to "
-                        "interpreting "
-                        "Ophelia. The first "
-                        "is informed by the "
-                        "fascination with "
-                        "and reaction "
-                        "against the "
-                        "Victorian "
-                        "pictorialization "
-                        "of Ophelia, "
-                        "especially John "
-                        "Everett Millais’s "
-                        "famous Ophelia "
-                        "(1851), that "
-                        "emphasized, as "
-                        "Kimberly Rhodes "
-                        "describes, her "
-                        "“pathos, "
-                        "innocence, and "
-                        "beauty rather than "
-                        "the unseemly "
-                        "detail of her "
-                        "death.” Despite "
-                        "having lived "
-                        "through negative "
-                        "experiences, "
-                        "Ophelia retains a "
-                        "childlike "
-                        "innocence in these "
-                        "rewritings. For "
-                        "example, New "
-                        "Hamlet by Lao She "
-                        "(penname of Shu "
-                        "Qingchun, "
-                        "1899-1966) "
-                        "parodies China’s "
-                        "“Hamlet complex” "
-                        "(the inability to "
-                        "act at a time of "
-                        "national crisis) "
-                        "and the "
-                        "fascination with "
-                        "an Ophelia "
-                        "submerged in "
-                        "water. Both "
-                        "Ophelia and "
-                        "Millais’s painting "
-                        "are featured in "
-                        "two of Japanese "
-                        "writer Natsume "
-                        "Sōseki’s early "
-                        "twentieth-century "
-                        "novels. A second "
-                        "approach "
-                        "emphasizes the "
-                        "local context. "
-                        "Adapters used "
-                        "local values to "
-                        "engage with and "
-                        "even critique the "
-                        "Victorian "
-                        "narrative "
-                        "tradition of "
-                        "moralization. Late "
-                        "nineteenth-century "
-                        "translator Lin Shu "
-                        "(1852-1924), for "
-                        "example, tones "
-                        "down the "
-                        "sentimentalization "
-                        "of Ophelia in his "
-                        "classical Chinese "
-                        "rewriting of "
-                        "Charles and Mary "
-                        "Lamb’s Tales from "
-                        "Shakespeare, "
-                        "showcasing the "
-                        "conflict between "
-                        "Victorian and "
-                        "Confucian moral "
-                        "codes. The third "
-                        "approach focuses "
-                        "upon an "
-                        "objectified and "
-                        "sexualized "
-                        "Ophelia. As other "
-                        "chapters in this "
-                        "volume "
-                        "demonstrate, this "
-                        "is not exclusively "
-                        "an Asian "
-                        "phenomenon. "
-                        "However, the "
-                        "eroticism "
-                        "associated with "
-                        "the Ophelia figure "
-                        "in a number of "
-                        "Asian stage and "
-                        "screen versions of "
-                        "Hamlet, such as "
-                        "Sherwood Hu’s film "
-                        "Prince of the "
-                        "Himalayas (2006), "
-                        "aligns Ophelia "
-                        "with East Asian "
-                        "ideals of "
-                        "femininity, but "
-                        "also brings out "
-                        "the sexuality that "
-                        "is latent or "
-                        "suppressed in "
-                        "Victorian "
-                        "interpretations. "
-                        "They do so by "
-                        "aligning Ophelia "
-                        "with East Asian "
-                        "ideals of "
-                        "femininity."
-                    ),
-                    "type": {"id": "other", "title": {"en": "Other"}},
-                }
-            ],
+            # "additional_descriptions": [
+            #     {
+            #         "description": (
+            #             "There are three main East Asian approaches to "
+            #             "interpreting Ophelia. The first is informed by "
+            #             "the fascination with and reaction against the "
+            #             "Victorian pictorialization of Ophelia, especially "
+            #             "John Everett Millais’s famous Ophelia (1851), "
+            #             "that emphasized, as Kimberly Rhodes describes, her "
+            #             "“pathos, innocence, and beauty rather than the "
+            #             "unseemly detail of her death.” Despite having "
+            #             "lived through negative experiences, Ophelia "
+            #             "retains a childlike innocence in these rewritings. "
+            #             "For example, New Hamlet by Lao She (penname of "
+            #             "Shu Qingchun, 1899-1966) parodies China’s "
+            #             "Hamlet complex against the Victorian pictorialization "
+            #             "of Ophelia, especially John Everett Millais’s "
+            #             "famous Ophelia (1851), that emphasized, as Kimberly "
+            #             "Rhodes describes, her “pathos, innocence, and "
+            #             "beauty rather than the unseemly detail of her "
+            #             "death.” Despite having lived through negative "
+            #             "experiences, Ophelia retains a childlike "
+            #             "innocence in these rewritings. For example, New "
+            #             "Hamlet by Lao She (penname of Shu Qingchun, "
+            #             "1899-1966) parodies China’s Hamlet complex "
+            #             "against the Victorian pictorialization of Ophelia, "
+            #             "especially John Everett Millais’s famous Ophelia "
+            #             "(1851), that emphasized, as Kimberly Rhodes describes, "
+            #             "her “pathos, innocence, and beauty rather than the "
+            #             "unseemly detail of her death.” Despite having lived "
+            #             "through negative experiences, Ophelia retains a "
+            #             "childlike innocence in these rewritings. For example, "
+            #             "New Hamlet by Lao She (penname of Shu Qingchun, "
+            #             "1899-1966) parodies China’s Hamlet complex "
+            #             "against the Victorian pictorialization of Ophelia, "
+            #             "especially John Everett Millais’s famous Ophelia "
+            #             "(1851), that emphasized, as Kimberly Rhodes describes, "
+            #             "her “pathos, innocence, and beauty rather than the "
+            #             "unseemly detail of her death.” Despite having lived "
+            #             "through negative experiences, Ophelia retains a "
+            #             "childlike innocence in these rewritings. For example, "
+            #             "New Hamlet by Lao She (penname of Shu Qingchun, "
+            #             "1899-1966) “Hamlet complex” (the inability to "
+            #             "act at a time of national crisis) and the "
+            #             "fascination with an Ophelia submerged in water. "
+            #             "Both Ophelia and Millais’s painting are featured in "
+            #             "two of Japanese writer Natsume Sōseki’s early "
+            #             "twentieth-century novels. A second approach "
+            #             "emphasizes the local context. Adapters used "
+            #             "local values to engage with and even critique the "
+            #             "Victorian narrative tradition of moralization. Late "
+            #             "nineteenth-century translator Lin Shu "
+            #             "(1852-1924), for example, tones "
+            #             "down the sentimentalization of Ophelia in his "
+            #             "classical Chinese rewriting of Charles and Mary "
+            #             "Lamb’s Tales from Shakespeare, showcasing the "
+            #             "conflict between Victorian and Confucian moral codes. "
+            #             "The third approach focuses upon an objectified "
+            #             "and sexualized Ophelia. As other chapters in "
+            #             "this volume demonstrate, this is not "
+            #             "exclusively an Asian phenomenon. However, the "
+            #             "eroticism associated with the Ophelia figure "
+            #             "in a number of Asian stage and screen versions of "
+            #             "Hamlet, such as Sherwood Hu’s film Prince of "
+            #             "the Himalayas (2006), aligns Ophelia with East "
+            #             "Asian ideals of femininity, but also brings out "
+            #             "the sexuality that is latent or suppressed in "
+            #             "Victorian interpretations. They do so by "
+            #             "aligning Ophelia with East Asian ideals of "
+            #             "femininity."
+            #         ),
+            #         "type": {"id": "other", "title": {"en": "Other"}},
+            #     }
+            # ],
             "creators": [
                 {
                     "affiliations": [{"name": "George Washington U"}],
@@ -294,7 +220,9 @@
                     "icon": "cc-by-nc-icon",
                     "props": {
                         "scheme": "spdx",
-                        "url": "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
+                        "url": (
+                            "https://creativecommons.org/licenses/by-nc/4.0/legalcode"
+                        ),
                     },
                     "title": {
                         "en": (
@@ -328,12 +256,25 @@
             ],
             "publisher": "Palgrave",
             "title": (
-                "The Paradox of Female Agency: Ophelia and East Asian "
-                "Sensibilities"
+                "The Paradox of Female Agency: Ophelia and East Asian " "Sensibilities"
             ),
         },
-        "parent": {"access": {"owned_by": [{"user": "49"}]}},
-        "updated": "2017-09-16T16:41:52Z",
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {
+                        "email": "test@example.com",
+                        "identifiers": [
+                            {
+                                "identifier": "49",
+                                "scheme": "neh_user_id",
+                            }
+                        ],
+                    }
+                ]
+            }
+        },
+        # "updated": "2017-09-16T16:41:52Z",
     },
     "expected_serialized": {
         "record_source": "knowledgeCommons",
@@ -356,9 +297,7 @@
             "hclegacy:groups_for_deposit": [
                 {
                     "group_identifier": "174",
-                    "group_name": (
-                        "East Asian Languages and Literatures after 1900"
-                    ),
+                    "group_name": ("East Asian Languages and Literatures after 1900"),
                 },
                 {
                     "group_identifier": "246",
@@ -523,8 +462,7 @@
             ],
             "publisher": "Palgrave",
             "title": (
-                "The Paradox of Female Agency: Ophelia and East Asian "
-                "Sensibilities"
+                "The Paradox of Female Agency: Ophelia and East Asian " "Sensibilities"
             ),
         },
         "parent": {"access": {"owned_by": [{"user": "49"}]}},
@@ -551,9 +489,7 @@
             "hclegacy:groups_for_deposit": [
                 {
                     "group_identifier": "174",
-                    "group_name": (
-                        "East Asian Languages and Literatures after 1900"
-                    ),
+                    "group_name": ("East Asian Languages and Literatures after 1900"),
                 },
                 {
                     "group_identifier": "246",
@@ -799,11 +735,10 @@
             ],
             "publisher": "Palgrave",
             "title": (
-                "The Paradox of Female Agency: Ophelia and East Asian "
-                "Sensibilities"
+                "The Paradox of Female Agency: Ophelia and East Asian " "Sensibilities"
             ),
         },
         "parent": {"access": {"owned_by": [{"user": "49"}]}},
-        "updated": "2017-09-16T16:41:52Z",
+        # "updated": "2017-09-16T16:41:52Z",
     },
 }
diff --git a/site/tests/helpers/sample_records/sample16079.py b/site/tests/helpers/sample_records/sample_metadata_conference_proceedings_pdf.py
similarity index 98%
rename from site/tests/helpers/sample_records/sample16079.py
rename to site/tests/helpers/sample_records/sample_metadata_conference_proceedings_pdf.py
index c40666ff5..2db6355bc 100644
--- a/site/tests/helpers/sample_records/sample16079.py
+++ b/site/tests/helpers/sample_records/sample_metadata_conference_proceedings_pdf.py
@@ -1,4 +1,4 @@
-rec16079 = {
+sample_metadata_conference_proceedings_pdf = {
     "input": {
         "created": "2017-10-26T12:31:39Z",
         "pids": {
@@ -175,9 +175,7 @@
                             "https://creativecommons.org/licenses/by/4.0/legalcode"
                         ),
                     },
-                    "title": {
-                        "en": "Creative Commons Attribution 4.0 International"
-                    },
+                    "title": {"en": "Creative Commons Attribution 4.0 International"},
                 }
             ],
             "subjects": [
@@ -598,13 +596,10 @@
                     "props": {
                         "scheme": "spdx",
                         "url": (
-                            "https://creativecommons.org/licenses/by/"
-                            "4.0/legalcode"
+                            "https://creativecommons.org/licenses/by/" "4.0/legalcode"
                         ),
                     },
-                    "title": {
-                        "en": "Creative Commons Attribution 4.0 International"
-                    },
+                    "title": {"en": "Creative Commons Attribution 4.0 International"},
                 }
             ],
             "subjects": [
diff --git a/site/tests/helpers/sample_records/sample44881.py b/site/tests/helpers/sample_records/sample_metadata_interview_transcript_pdf.py
similarity index 99%
rename from site/tests/helpers/sample_records/sample44881.py
rename to site/tests/helpers/sample_records/sample_metadata_interview_transcript_pdf.py
index 678d702a7..b316ae65d 100644
--- a/site/tests/helpers/sample_records/sample44881.py
+++ b/site/tests/helpers/sample_records/sample_metadata_interview_transcript_pdf.py
@@ -1,4 +1,4 @@
-rec44881 = {
+sample_metadata_interview_transcript_pdf = {
     "input": {
         "pids": {
             "doi": {
diff --git a/site/tests/helpers/sample_records/sample42615.py b/site/tests/helpers/sample_records/sample_metadata_journal_article2_pdf.py
similarity index 97%
rename from site/tests/helpers/sample_records/sample42615.py
rename to site/tests/helpers/sample_records/sample_metadata_journal_article2_pdf.py
index deca8c7da..8d9c1b39a 100644
--- a/site/tests/helpers/sample_records/sample42615.py
+++ b/site/tests/helpers/sample_records/sample_metadata_journal_article2_pdf.py
@@ -1,4 +1,4 @@
-rec42615 = {
+sample_metadata_journal_article2_pdf = {
     "input": {
         "created": "2021-11-10T15:06:20Z",
         "pids": {
@@ -32,8 +32,7 @@
                 "issue": "2",
                 "pages": "359-370",
                 "title": (
-                    "Journal of Traditional Building, Architecture and "
-                    "Urbanism"
+                    "Journal of Traditional Building, Architecture and " "Urbanism"
                 ),
             },
             "kcr:commons_domain": "sah.hcommons.org",
@@ -60,8 +59,7 @@
             "entries": {
                 "palazzo-vernacular_patterns_in_portugal_and_brazil-2021.pdf": {  # noqa: E501
                     "key": (
-                        "palazzo-vernacular_patterns_in_portugal_and_b"
-                        "razil-2021.pdf"
+                        "palazzo-vernacular_patterns_in_portugal_and_b" "razil-2021.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
@@ -203,7 +201,18 @@
                 "and Adaptations"
             ),
         },
-        "parent": {"access": {"owned_by": [{"user": "1011841"}]}},
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {
+                        "email": "test@example.com",
+                        "identifiers": [
+                            {"identifier": "1011841", "scheme": "neh_user_id"}
+                        ],
+                    }
+                ]
+            }
+        },
         "updated": "2021-11-10T15:06:20Z",
     },
     "expected_serialized": {
@@ -240,8 +249,7 @@
                 "issue": "2",
                 "pages": "359-370",
                 "title": (
-                    "Journal of Traditional Building, Architecture and "
-                    "Urbanism"
+                    "Journal of Traditional Building, Architecture and " "Urbanism"
                 ),
             },
             "kcr:commons_domain": "sah.hcommons.org",
@@ -268,8 +276,7 @@
             "entries": {
                 "palazzo-vernacular_patterns_in_portugal_and_brazil-2021.pdf": {  # noqa: E501
                     "key": (
-                        "palazzo-vernacular_patterns_in_portugal_and_b"
-                        "razil-2021.pdf"
+                        "palazzo-vernacular_patterns_in_portugal_and_b" "razil-2021.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
@@ -428,8 +435,7 @@
                 "issue": "2",
                 "pages": "359-370",
                 "title": (
-                    "Journal of Traditional Building, Architecture and "
-                    "Urbanism"
+                    "Journal of Traditional Building, Architecture and " "Urbanism"
                 ),
             },
             "kcr:commons_domain": "sah.hcommons.org",
@@ -456,8 +462,7 @@
             "entries": {
                 "palazzo-vernacular_patterns_in_portugal_and_brazil-2021.pdf": {  # noqa: E501
                     "key": (
-                        "palazzo-vernacular_patterns_in_portugal_and_b"
-                        "razil-2021.pdf"
+                        "palazzo-vernacular_patterns_in_portugal_and_b" "razil-2021.pdf"
                     ),
                     "mimetype": "application/pdf",
                     "size": "17181",
diff --git a/site/tests/helpers/sample_records/sample33383.py b/site/tests/helpers/sample_records/sample_metadata_journal_article_pdf.py
similarity index 91%
rename from site/tests/helpers/sample_records/sample33383.py
rename to site/tests/helpers/sample_records/sample_metadata_journal_article_pdf.py
index 8afb02e3d..27dc9377b 100644
--- a/site/tests/helpers/sample_records/sample33383.py
+++ b/site/tests/helpers/sample_records/sample_metadata_journal_article_pdf.py
@@ -1,12 +1,14 @@
-rec33383 = {
+sample_metadata_journal_article_pdf = {
     "input": {
         "access": {
-            "embargo": {"active": True, "reason": None, "until": "2030-11-25"},
-            "files": "restricted",
+            # "embargo": {"active": True, "reason": None, "until": "2030-11-25"},
+            # "files": "restricted",
+            "files": "public",
             "record": "public",
-            "status": "embargoed",
+            # "status": "embargoed",
+            "status": "open",
         },
-        "created": "2020-11-25T12:35:10Z",
+        # "created": "2020-11-25T12:35:10Z",
         "custom_fields": {
             "hclegacy:collection": "hccollection:1",
             "hclegacy:file_location": (
@@ -16,30 +18,28 @@
                 "pdf"
             ),
             "hclegacy:file_pid": "hc:33384",
-            "hclegacy:groups_for_deposit": [
-                {
-                    "group_identifier": "1004129",
-                    "group_name": "Arabic script manuscripts",
-                },
-                {
-                    "group_identifier": "1001234",
-                    "group_name": "Islamicate Studies",
-                },
-                {
-                    "group_identifier": "1000753",
-                    "group_name": "Medieval Studies",
-                },
-                {
-                    "group_identifier": "1000830",
-                    "group_name": "Science Studies and the History of Science",
-                },
-            ],
+            # "hclegacy:groups_for_deposit": [
+            #     {
+            #         "group_identifier": "1004129",
+            #         "group_name": "Arabic script manuscripts",
+            #     },
+            #     {
+            #         "group_identifier": "1001234",
+            #         "group_name": "Islamicate Studies",
+            #     },
+            #     {
+            #         "group_identifier": "1000753",
+            #         "group_name": "Medieval Studies",
+            #     },
+            #     {
+            #         "group_identifier": "1000830",
+            #         "group_name": "Science Studies and the History of Science",
+            #     },
+            # ],
             "hclegacy:previously_published": "published",
             "hclegacy:record_change_date": "2023-01-23T14:20:48Z",
             "hclegacy:record_creation_date": "2020-11-25T12:35:10Z",
-            "hclegacy:submitter_affiliation": (
-                "University of Southern California"
-            ),
+            "hclegacy:submitter_affiliation": ("University of Southern California"),
             "hclegacy:submitter_id": "1008812",
             "hclegacy:submitter_org_memberships": ["hc"],
             "hclegacy:total_views": 103,
@@ -60,7 +60,7 @@
             ],
         },
         "files": {
-            "default_preview": "24519197_005_03-04_s004_text.pdf",
+            # "default_preview": "24519197_005_03-04_s004_text.pdf",
             "enabled": True,
             "entries": {
                 "24519197_005_03-04_s004_text.pdf": {
@@ -118,9 +118,7 @@
             # ],
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "University of Southern California"}
-                    ],
+                    "affiliations": [{"name": "University of Southern California"}],
                     "person_or_org": {
                         "family_name": "Roberts",
                         "given_name": "Alexandre",
@@ -172,11 +170,7 @@
                 "philologically."
             ),
             "identifiers": [
-                {"identifier": "hc:33383", "scheme": "hclegacy-pid"},
-                {
-                    "identifier": "1000360-40298",
-                    "scheme": "hclegacy-record-id",
-                },
+                {"identifier": "1234567890", "scheme": "import-recid"},
                 {"identifier": "10.1163/24519197-BJA10007", "scheme": "doi"},
                 {"identifier": "2451-9197", "scheme": "issn"},
             ],
@@ -185,20 +179,7 @@
             "resource_type": {"id": "textDocument-journalArticle"},
             "rights": [
                 {
-                    "description": {
-                        "en": (
-                            "Proprietary material. No permissions are "
-                            "granted for any kind of copyring or "
-                            "re-use. All rights reserved"
-                        )
-                    },
                     "id": "arr",
-                    "icon": "copyright",
-                    "props": {
-                        "url": (
-                            "https://en.wikipedia.org/wiki/All_rights_reserved"
-                        )
-                    },
                     "title": {"en": "All Rights Reserved"},
                 }
             ],
@@ -231,7 +212,26 @@
                 "University"
             ),
         },
-        "parent": {"access": {"owned_by": [{"user": "1008812"}]}},
+        "parent": {
+            "access": {
+                "owned_by": [
+                    {
+                        "email": "test@example.com",
+                        "identifiers": [
+                            {"identifier": "test", "scheme": "neh_user_id"},
+                        ],
+                    },
+                    {
+                        "full_name": "John Doe",
+                        "email": "john.doe@example.com",
+                        "identifiers": [
+                            {"identifier": "0000-0002-1825-0097", "scheme": "orcid"},
+                            {"identifier": "jdoe", "scheme": "kc_username"},
+                        ],
+                    },
+                ]
+            }
+        },
         "pids": {
             "doi": {
                 "client": "datacite",
@@ -239,7 +239,7 @@
                 "provider": "datacite",
             }
         },
-        "updated": "2023-01-23T14:20:48Z",
+        # "updated": "2023-01-23T14:20:48Z",
     },
     "expected_serialized": {
         "record_source": "knowledgeCommons",
@@ -280,9 +280,7 @@
             "hclegacy:previously_published": "published",
             "hclegacy:record_change_date": "2023-01-23T14:20:48Z",
             "hclegacy:record_creation_date": "2020-11-25T12:35:10Z",
-            "hclegacy:submitter_affiliation": (
-                "University of Southern California"
-            ),
+            "hclegacy:submitter_affiliation": ("University of Southern California"),
             "hclegacy:submitter_id": "1008812",
             "hclegacy:submitter_org_memberships": ["hc"],
             "hclegacy:total_views": 103,
@@ -361,9 +359,7 @@
             # ],
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "University of Southern California"}
-                    ],
+                    "affiliations": [{"name": "University of Southern California"}],
                     "person_or_org": {
                         "family_name": "Roberts",
                         "given_name": "Alexandre",
@@ -505,9 +501,7 @@
             "hclegacy:previously_published": "published",
             "hclegacy:record_change_date": "2023-01-23T14:20:48Z",
             "hclegacy:record_creation_date": "2020-11-25T12:35:10Z",
-            "hclegacy:submitter_affiliation": (
-                "University of Southern California"
-            ),
+            "hclegacy:submitter_affiliation": ("University of Southern California"),
             "hclegacy:submitter_id": "1008812",
             "hclegacy:submitter_org_memberships": ["hc"],
             "hclegacy:total_views": 103,
@@ -593,9 +587,7 @@
             # ],
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "University of Southern California"}
-                    ],
+                    "affiliations": [{"name": "University of Southern California"}],
                     "person_or_org": {
                         "family_name": "Roberts",
                         "given_name": "Alexandre",
diff --git a/site/tests/helpers/sample_records/sample38367.py b/site/tests/helpers/sample_records/sample_metadata_thesis_pdf.py
similarity index 97%
rename from site/tests/helpers/sample_records/sample38367.py
rename to site/tests/helpers/sample_records/sample_metadata_thesis_pdf.py
index 711fb0701..16a332d66 100644
--- a/site/tests/helpers/sample_records/sample38367.py
+++ b/site/tests/helpers/sample_records/sample_metadata_thesis_pdf.py
@@ -1,4 +1,4 @@
-rec38367 = {
+sample_metadata_thesis_pdf = {
     "input": {
         "created": "2021-04-26T05:57:56Z",
         "custom_fields": {
@@ -42,9 +42,7 @@
                 "macroeconomics",
                 "modelling",
             ],
-            "thesis:university": (
-                "Universidad Nacional Autónoma de México (UNAM)"
-            ),
+            "thesis:university": ("Universidad Nacional Autónoma de México (UNAM)"),
         },
         "files": {
             "default_preview": (
@@ -132,9 +130,7 @@
             ],
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "Université Sorbonne Paris Nord"}
-                    ],
+                    "affiliations": [{"name": "Université Sorbonne Paris Nord"}],
                     "person_or_org": {
                         "family_name": "Martínez Hernández",
                         "given_name": "Alberto-Gabino",
@@ -228,9 +224,7 @@
                     "id": "arr",
                     "icon": "copyright",
                     "props": {
-                        "url": (
-                            "https://en.wikipedia.org/wiki/All_rights_reserved"
-                        )
+                        "url": ("https://en.wikipedia.org/wiki/All_rights_reserved")
                     },
                     "title": {"en": "All Rights Reserved"},
                 }
@@ -313,9 +307,7 @@
                 "macroeconomics",
                 "modelling",
             ],
-            "thesis:university": (
-                "Universidad Nacional Autónoma de México (UNAM)"
-            ),
+            "thesis:university": ("Universidad Nacional Autónoma de México (UNAM)"),
         },
         "files": {
             "default_preview": (
@@ -336,9 +328,7 @@
         "metadata": {
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "Université Sorbonne Paris Nord"}
-                    ],
+                    "affiliations": [{"name": "Université Sorbonne Paris Nord"}],
                     "person_or_org": {
                         "family_name": "Martínez Hernández",
                         "given_name": "Alberto-Gabino",
@@ -566,9 +556,7 @@
                 "macroeconomics",
                 "modelling",
             ],
-            "thesis:university": (
-                "Universidad Nacional Autónoma de México (UNAM)"
-            ),
+            "thesis:university": ("Universidad Nacional Autónoma de México (UNAM)"),
         },
         "files": {
             "default_preview": (
@@ -596,9 +584,7 @@
         "metadata": {
             "creators": [
                 {
-                    "affiliations": [
-                        {"name": "Université Sorbonne Paris Nord"}
-                    ],
+                    "affiliations": [{"name": "Université Sorbonne Paris Nord"}],
                     "person_or_org": {
                         "family_name": "Martínez Hernández",
                         "given_name": "Alberto-Gabino",
diff --git a/site/tests/helpers/sample_records/sample48799.py b/site/tests/helpers/sample_records/sample_metadata_white_paper_pdf.py
similarity index 98%
rename from site/tests/helpers/sample_records/sample48799.py
rename to site/tests/helpers/sample_records/sample_metadata_white_paper_pdf.py
index 2f33c3d8d..636d9c1d7 100644
--- a/site/tests/helpers/sample_records/sample48799.py
+++ b/site/tests/helpers/sample_records/sample_metadata_white_paper_pdf.py
@@ -1,4 +1,4 @@
-rec48799 = {
+sample_metadata_white_paper_pdf = {
     "input": {
         "created": "2022-09-29T14:34:36Z",
         "custom_fields": {
@@ -179,13 +179,10 @@
                     "props": {
                         "scheme": "spdx",
                         "url": (
-                            "https://creativecommons.org/licenses/by/"
-                            "4.0/legalcode"
+                            "https://creativecommons.org/licenses/by/" "4.0/legalcode"
                         ),
                     },
-                    "title": {
-                        "en": "Creative Commons Attribution 4.0 International"
-                    },
+                    "title": {"en": "Creative Commons Attribution 4.0 International"},
                 }
             ],
             "title": "Super Apps: A Platform Lab Report",
@@ -569,15 +566,10 @@
                     "props": {
                         "scheme": "spdx",
                         "url": (
-                            "https://creativecommons.org/licenses/by/4.0/"
-                            "legalcode"
+                            "https://creativecommons.org/licenses/by/4.0/" "legalcode"
                         ),
                     },
-                    "title": {
-                        "en": (
-                            "Creative Commons Attribution 4.0 International"
-                        )
-                    },
+                    "title": {"en": ("Creative Commons Attribution 4.0 International")},
                 }
             ],
             "title": "Super Apps: A Platform Lab Report",