From e4fc84d42fb798c540a67051e35b541e465d5f9a Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Wed, 28 May 2025 19:52:20 -0400 Subject: [PATCH 1/7] fix(deps): Updates to invenio-modular-deposit-form with bug fixes and refactoring for language fields, remote select widgets, field labels. --- site/kcworks/dependencies/invenio-modular-deposit-form | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/site/kcworks/dependencies/invenio-modular-deposit-form b/site/kcworks/dependencies/invenio-modular-deposit-form index a2972edd7..aee82bbc3 160000 --- a/site/kcworks/dependencies/invenio-modular-deposit-form +++ b/site/kcworks/dependencies/invenio-modular-deposit-form @@ -1 +1 @@ -Subproject commit a2972edd7bff8295d581e4a38013b3d87a88dd8a +Subproject commit aee82bbc3ecf56e9922518d38dbcd1406b55d944 diff --git a/uv.lock b/uv.lock index 949596bf9..cc4e258ee 100644 --- a/uv.lock +++ b/uv.lock @@ -3211,7 +3211,7 @@ wheels = [ [[package]] name = "kcworks" -version = "0.5.0b13" +version = "0.5.1" source = { editable = "." } dependencies = [ { name = "aiohttp" }, From 11ce20fc74a5703ac034317198a1db9aea190a51 Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Wed, 28 May 2025 19:16:22 -0400 Subject: [PATCH 2/7] wip: Implemented CLI to make bulk uniform updates across a collection; Working on getting last test running --- docs/source/reference/cli_commands.md | 30 +- site/kcworks/cli.py | 5 + .../services/records/bulk_operations.py | 96 ++++++ site/kcworks/services/records/cli.py | 64 ++++ tests/api/test_api_record_ops.py | 73 ++++- tests/api/test_bulk_operations.py | 290 ++++++++++++++++++ tests/cli/test_kcworks_cli.py | 231 ++++++++++++++ tests/fixtures/records.py | 46 +++ tests/fixtures/search_provisioning.py | 11 +- translations/en/LC_MESSAGES/messages.po | 48 ++- translations/messages.pot | 48 ++- 11 files changed, 926 insertions(+), 16 deletions(-) create mode 100644 site/kcworks/services/records/bulk_operations.py create mode 100644 site/kcworks/services/records/cli.py create mode 100644 tests/api/test_bulk_operations.py create mode 100644 tests/cli/test_kcworks_cli.py diff --git a/docs/source/reference/cli_commands.md b/docs/source/reference/cli_commands.md index 1005b8fa1..5040a109c 100644 --- a/docs/source/reference/cli_commands.md +++ b/docs/source/reference/cli_commands.md @@ -55,9 +55,37 @@ KCWorks includes a number of custom CLI commands that are not part of the core I - destroys search indices for the KCWorks instance that are *not* destroyed by the main KCWorks index destroy command. These are primarily the indices for storing usage events and aggregated usage data. - **WARNING:** This data *only* exists in the OpenSearch indices. It is not backed up by the database and will be lost if the indices are destroyed. Use this command with extreme caution. +### `invenio kcworks-records` + +- **provided by the main KCWorks package** (kcworks/site/cli.py and kcworks/services/records/cli.py) + + +#### `invenio kcworks-records bulk-update` + +Updates a single metadata field to a single new fixed value for **every** record in a community. + +Arguments: +- `community_id`: the ID (the UUID) of the collection to update. +- `metadata_field`: the field to update. +- `new_value`: the new value to set for the field. + +Example: +```shell +invenio kcworks-records bulk-update 1234567890 metadata.title "New Title" +``` + +```{note} +Note that the `new_value` argument may be either a python literal or a plain string. Anything that cannot be parsed as a python literal will be treated as a plain string. +``` + +```{note} +Also note that the `community_id` argument is the ID (the UUID) of the collection, not the collection name or its url slug. If you're not sure what the collection ID is, you can find it by looking at the api response for the collection. +``` + + ### `invenio kcworks-users` -- **provided by the main KCWorks package** (kcworks/site/cli.py) +- **provided by the main KCWorks package** (kcworks/site/cli.py and kcworks/services/users/cli.py) #### `invenio kcworks-users name-parts` Either reads or updates the dictionary of name parts that KCWorks will use to construct the full name of a user (e.g., first name, last name, middle name, etc.) for display in the UI and in creating record metadata. diff --git a/site/kcworks/cli.py b/site/kcworks/cli.py index feb0ad428..dce308e2e 100644 --- a/site/kcworks/cli.py +++ b/site/kcworks/cli.py @@ -26,6 +26,7 @@ from kcworks.services.users.cli import name_parts as name_parts_command from kcworks.services.users.cli import read as read_command from kcworks.services.users.cli import user_groups as user_groups_command +from kcworks.services.records.cli import kcworks_records as records_command UNMANAGED_INDICES = [ "kcworks-stats-record-view", @@ -106,3 +107,7 @@ def destroy_indices(force): ) as bar: for name, _response in bar: bar.label = name + + +# Register the records command group +kcworks_users.add_command(records_command) diff --git a/site/kcworks/services/records/bulk_operations.py b/site/kcworks/services/records/bulk_operations.py new file mode 100644 index 000000000..8a83beb24 --- /dev/null +++ b/site/kcworks/services/records/bulk_operations.py @@ -0,0 +1,96 @@ +"""Bulk operations for records.""" + +from pprint import pformat +from typing import Any, TypedDict + +from flask import current_app +from invenio_access.permissions import system_identity +from invenio_communities.proxies import current_communities +from invenio_pidstore.errors import PIDDoesNotExistError +from invenio_rdm_records.proxies import current_rdm_records_service +from invenio_record_importer_kcworks.utils.utils import replace_value_in_nested_dict +from invenio_search.proxies import current_search_client +from kcworks.utils.utils import get_value_by_path +from opensearchpy.helpers.search import Search + + +class UpdateResult(TypedDict): + total_record_count: int + updated_record_count: int + failed_record_count: int + updated_records: list[dict[str, Any]] + errors: list[str] + + +def update_community_records_metadata( + community_id: str, metadata_field: str, new_value: Any +) -> UpdateResult: + """Update a specific metadata field for all records in a community. + + Args: + community_id (str): The ID of the community whose records should be updated + metadata_field (str): The metadata field to update (e.g. 'metadata.title') + new_value (any): The new value to set for the field + + Returns: + UpdateResult: A summary of the operation including: + - total_record_count (int): Total number of records found + - updated_record_count (int): Number of records successfully updated + - failed_record_count (int): Number of records that failed to update + - updated_records (list[dict]): List of dictionaries representing + the records successfully updated, each of which with the keys: + - id (str) + - metadata_field (str) + - old_value (any) + - new_value (any) + - errors (list[str]): List of error messages for failed updates + """ + results: UpdateResult = { + "total_record_count": 0, + "updated_record_count": 0, + "failed_record_count": 0, + "updated_records": [], + "errors": [], + } + + try: + current_communities.service.read(system_identity, community_id) + except PIDDoesNotExistError: + raise ValueError(f"Community {community_id} not found") + + prefix = current_app.config.get("SEARCH_INDEX_PREFIX", "") + search = Search(using=current_search_client, index=f"{prefix}rdmrecords-records") + search = search.filter("term", parent__communities__ids=community_id) + + # Use scan (scroll) to allow for more than 10k records + for hit in search.scan(): + current_app.logger.error(f"Processing page {pformat(hit)}") + results["total_record_count"] += 1 + + try: + # Update the record via a draft + draft = current_rdm_records_service.edit(system_identity, hit["id"]) + draft_data = draft.to_dict() + old_value = get_value_by_path(draft_data, metadata_field) + draft_data = replace_value_in_nested_dict( + draft_data, metadata_field.replace(".", "|"), new_value + ) + current_rdm_records_service.update_draft( + system_identity, draft.id, draft_data + ) + current_rdm_records_service.publish(system_identity, draft.id) + results["updated_record_count"] += 1 + results["updated_records"].append( + { + "id": hit["id"], + "metadata_field": metadata_field, + "old_value": old_value, + "new_value": new_value, + } + ) + + except Exception as e: + results["failed_record_count"] += 1 + results["errors"].append(f"Failed to update record {hit['id']}: {str(e)}") + + return results diff --git a/site/kcworks/services/records/cli.py b/site/kcworks/services/records/cli.py new file mode 100644 index 000000000..dd428ed5e --- /dev/null +++ b/site/kcworks/services/records/cli.py @@ -0,0 +1,64 @@ +"""CLI commands for record operations.""" + +import ast +from pprint import pformat + +import click +from flask.cli import with_appcontext + +from kcworks.services.records.bulk_operations import update_community_records_metadata + + +@click.group() +def kcworks_records(): + """CLI utility command group for record operations.""" + pass + + +@kcworks_records.command("bulk-update") +@click.argument("community_id", type=str, required=True) +@click.argument("metadata_field", type=str, required=True) +@click.argument("new_value", type=str, required=True) +@with_appcontext +def bulk_update(community_id: str, metadata_field: str, new_value: str) -> None: + """Update a metadata field for all records in a community. + + Parameters: + community_id (str): The ID of the community whose records should be updated + metadata_field (str): The metadata field to update (e.g. 'metadata.title') + new_value (str): The new value to set for the field. If it's a valid Python literal + (e.g. '"string"', '123', '["list", "of", "items"]'), it will be parsed as such. + Otherwise, it will be treated as a string. + """ + try: + # First try to parse as a Python literal + parsed_value = ast.literal_eval(new_value) + except (SyntaxError, ValueError): + # If parsing fails, use the value as-is + parsed_value = new_value + + print( + f"Updating {metadata_field} to '{parsed_value}' " + f"for all records in community {community_id}" + ) + + try: + results = update_community_records_metadata( + community_id=community_id, + metadata_field=metadata_field, + new_value=parsed_value, + ) + except ValueError as e: + print(f"Error updating records: {e}") + return + + print("\nResults:") + print(f"Total records found: {results['total_record_count']}") + print(f"Successfully updated: {results['updated_record_count']}") + print(f"Failed to update: {results['failed_record_count']}") + print(f"Updated records: {pformat(results['updated_records'])}") + + if results["errors"]: + print("\nErrors:") + for error in results["errors"]: + print(f"- {error}") diff --git a/tests/api/test_api_record_ops.py b/tests/api/test_api_record_ops.py index 56d1557ae..6e59bce57 100644 --- a/tests/api/test_api_record_ops.py +++ b/tests/api/test_api_record_ops.py @@ -21,6 +21,7 @@ from invenio_access.permissions import authenticated_user, system_identity from invenio_access.utils import get_identity from invenio_rdm_records.proxies import current_rdm_records_service as records_service +from invenio_record_importer_kcworks.utils.utils import replace_value_in_nested_dict from tests.conftest import RunningApp @@ -231,6 +232,12 @@ def skip_fields(self) -> list[str]: # noqa: D102 return ["metadata.title", "metadata.resource_type"] +@pytest.fixture +def db_session_options(): + """Configure database session options.""" + return {"expire_on_commit": False} + + def test_record_publication_api( running_app: RunningApp, db: SQLAlchemy, @@ -240,6 +247,7 @@ def test_record_publication_api( search_clear: Callable, celery_worker: Callable, mock_send_remote_api_update_fixture: Callable, + db_session_options: dict, ): """Test that a user can publish a draft record via the API.""" app = running_app.app @@ -247,32 +255,43 @@ def test_record_publication_api( u = user_factory( email=user_data_set["user1"]["email"], password="test", + admin=False, token=True, - admin=True, + saml_src=None, + saml_id=None, ) user = u.user token = u.allowed_token with app.test_client() as client: logged_in_client = client_with_login(client, user) + app.logger.error("Creating draft record...") response = logged_in_client.post( f"{app.config['SITE_API_URL']}/records", data=json.dumps(metadata.metadata_in), headers={**headers, "Authorization": f"Bearer {token}"}, ) + app.logger.error(f"Draft creation response: {pformat(response.json)}") assert response.status_code == 201 actual_draft = response.json actual_draft_id = actual_draft["id"] + app.logger.error(f"Draft ID: {actual_draft_id}") publish_response = logged_in_client.post( f"{app.config['SITE_API_URL']}/records/{actual_draft_id}/draft" "/actions/publish", headers={**headers, "Authorization": f"Bearer {token}"}, ) + app.logger.error(f"Publish response status: {publish_response.status_code}") + if publish_response.status_code != 202: + app.logger.error( + f"Publish response error: {pformat(publish_response.json)}" + ) assert publish_response.status_code == 202 actual_published = publish_response.json + app.logger.error(f"Published record: {pformat(actual_published)}") assert actual_published["id"] == actual_draft_id assert actual_published["is_published"] assert not actual_published["is_draft"] @@ -281,12 +300,13 @@ def test_record_publication_api( assert actual_published["versions"]["index"] == 1 assert actual_published["status"] == "published" + # Compare the published metadata with the expected metadata + metadata.compare_published(actual_published, by_api=True) + def test_record_publication_service( running_app: RunningApp, db: SQLAlchemy, - client_with_login: Callable, - headers: dict, user_factory: Callable, search_clear: Callable, celery_worker: Callable, @@ -419,18 +439,51 @@ def test_record_draft_update_service( assert actual_edited["revision_id"] == 7 # TODO: Why is this 7? -@pytest.mark.skip(reason="Not implemented") -def test_record_published_update( +def test_record_published_update_service( running_app: RunningApp, db: SQLAlchemy, - client_with_login: Callable, - headers: dict, + record_metadata: Callable, + minimal_published_record_factory: Callable, user_factory: Callable, search_clear: Callable, + celery_worker: Callable, mock_send_remote_api_update_fixture: Callable, ): """Test that a user can update a published record via the API.""" - pass + app = running_app.app + u = user_factory( + email=user_data_set["user1"]["email"], + password="test", + token=True, + admin=True, + ) + user = u.user + identity = get_identity(user) + app.logger.error(f"identity: {pformat(identity)}") + identity.provides.add(authenticated_user) + + metadata = record_metadata(owner_id=user.id) + + record = minimal_published_record_factory( + metadata=metadata.metadata_in, identity=identity + ) + running_app.app.logger.error(f"record: {pformat(record.to_dict())}") + record_id = record.id + + new_draft = records_service.edit(identity, record_id) + new_draft_data = copy.deepcopy(new_draft.data) + new_draft_data = replace_value_in_nested_dict( + new_draft_data, "metadata|title", "A Romans Story 2" + ) + + records_service.update_draft(identity, record_id, new_draft_data) + + published_record = records_service.publish(identity, record_id) + + assert published_record.to_dict()["metadata"]["title"] == "A Romans Story 2" + + updated_record = records_service.read(system_identity, record_id) + assert updated_record.to_dict()["metadata"]["title"] == "A Romans Story 2" @pytest.mark.skip(reason="Not implemented") @@ -574,8 +627,8 @@ def test_record_file_upload_api( "files/sample.pdf" ), "commit": ( - f"{app.config['SITE_API_URL']}/records/{draft_id}/draft/" - "files/sample.pdf/commit" + f"{app.config['SITE_API_URL']}/records/{draft_id}/draft/files/" + "sample.pdf/commit" ), "iiif_api": ( f"{app.config['SITE_API_URL']}/iiif/draft:{draft_id}:sample.pdf" diff --git a/tests/api/test_bulk_operations.py b/tests/api/test_bulk_operations.py new file mode 100644 index 000000000..c3d740a28 --- /dev/null +++ b/tests/api/test_bulk_operations.py @@ -0,0 +1,290 @@ +"""Tests for bulk record operations.""" + +from pathlib import Path + +import pytest +from invenio_access.permissions import system_identity +from invenio_rdm_records.proxies import current_rdm_records_service +from kcworks.services.records.bulk_operations import update_community_records_metadata + + +def test_update_community_records_metadata( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, +): + """Test bulk updating metadata for records in a community.""" + # Create a test community + community = minimal_community_factory( + metadata={"title": "Test Community"}, + slug="test-community", + ) + community_id = community.id + + # Create some test records in the community + records = [] + for i in range(3): + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": f"Test Record {i}", + "publisher": f"Test Publisher {i}", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": f"Test Creator {i}", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": True, + "entries": { + "sample.pdf": { + "key": "sample.pdf", + "size": 13264, + "content_type": "application/pdf", + "mimetype": "application/pdf", + } + }, + }, + }, + community_list=[community_id], + file_paths=[ + Path( + Path(__file__).parent.parent + / "helpers" + / "sample_files" + / "sample.pdf" + ) + .absolute() + .as_posix() + ], + ) + records.append(record) + + # Update a metadata field for all records + results = update_community_records_metadata( + community_id=community_id, + metadata_field="metadata.publisher", + new_value="Updated publisher", + ) + + # Verify results + assert results["total_record_count"] == 3 + assert results["updated_record_count"] == 3 + assert results["failed_record_count"] == 0 + assert len(results["updated_records"]) == 3 + for i, record in enumerate(records): + assert results["updated_records"][i]["id"] == record.id + assert results["updated_records"][i]["metadata_field"] == "metadata.publisher" + assert results["updated_records"][i]["old_value"] == f"Test Publisher {i}" + assert results["updated_records"][i]["new_value"] == "Updated publisher" + assert len(results["errors"]) == 0 + + # Verify the updates were applied + for record in records: + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert updated_record.data["metadata"]["publisher"] == "Updated publisher" + + +def test_update_community_records_metadata_nested_field( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, +): + """Test bulk updating nested metadata fields for records in a community.""" + # Create a test community + community = minimal_community_factory( + metadata={"title": "Test Community"}, + slug="test-community", + ) + community_id = community.id + + # Create a test record in the community + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": "Test Record", + "publisher": "Test Publisher", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": "Test Creator", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": False, + }, + }, + community_list=[community_id], + ) + + # Update a nested metadata field + results = update_community_records_metadata( + community_id=community_id, + metadata_field="metadata.creators.0.person_or_org.family_name", + new_value="New", + ) + + # Verify results + assert results["total_record_count"] == 1 + assert results["updated_record_count"] == 1 + assert results["failed_record_count"] == 0 + assert len(results["updated_records"]) == 1 + assert results["updated_records"][0]["id"] == record.id + assert ( + results["updated_records"][0]["metadata_field"] + == "metadata.creators.0.person_or_org.family_name" + ) + assert results["updated_records"][0]["old_value"] == "Creator" + assert results["updated_records"][0]["new_value"] == "New" + assert len(results["errors"]) == 0 + + # Verify the update was applied + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert ( + updated_record.data["metadata"]["creators"][0]["person_or_org"]["family_name"] + == "New" + ) + assert ( + updated_record.data["metadata"]["creators"][0]["person_or_org"]["name"] + == "New, Test" + ) + + +def test_update_community_records_metadata_dict( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, +): + """Test bulk updating nested metadata fields for records in a community.""" + # Create a test community + community = minimal_community_factory( + metadata={ + "title": "Test Community", + }, + slug="test-community", + ) + community_id = community.id + + # Create a test record in the community + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": "Test Record", + "publisher": "Test Publisher", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": "Test Creator", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": False, + }, + }, + community_list=[community_id], + ) + + # Update a nested metadata field + results = update_community_records_metadata( + community_id=community_id, + metadata_field="metadata.creators", + new_value=[ + { + "person_or_org": { + "name": "New Creator", + "family_name": "Creator", + "given_name": "New", + "type": "personal", + } + } + ], + ) + + assert results["total_record_count"] == 1 + assert results["updated_record_count"] == 1 + assert results["failed_record_count"] == 0 + assert len(results["updated_records"]) == 1 + assert results["updated_records"][0]["id"] == record.id + assert results["updated_records"][0]["metadata_field"] == "metadata.creators" + assert results["updated_records"][0]["old_value"] == [ + { + "person_or_org": { + "name": "Creator, Test", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ] + assert results["updated_records"][0]["new_value"] == [ + { + "person_or_org": { + "name": "New Creator", + "family_name": "Creator", + "given_name": "New", + "type": "personal", + } + } + ] + assert len(results["errors"]) == 0 + + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert updated_record.data["metadata"]["creators"] == [ + { + "person_or_org": { + "name": "Creator, New", + "family_name": "Creator", + "given_name": "New", + "type": "personal", + } + } + ] + + +def test_update_community_records_metadata_nonexistent_community( + running_app, + db, + search_clear, +): + """Test bulk updating metadata for a nonexistent community.""" + # Try to update records in a nonexistent community + with pytest.raises(ValueError): + update_community_records_metadata( + community_id="nonexistent-community", + metadata_field="metadata.publisher", + new_value="Updated publisher", + ) diff --git a/tests/cli/test_kcworks_cli.py b/tests/cli/test_kcworks_cli.py new file mode 100644 index 000000000..a401fa8dc --- /dev/null +++ b/tests/cli/test_kcworks_cli.py @@ -0,0 +1,231 @@ +"""Tests for KCWorks CLI commands.""" + +import pytest +from invenio_access.permissions import system_identity +from invenio_rdm_records.proxies import current_rdm_records_service +from kcworks.services.records.cli import kcworks_records + + +@pytest.fixture(scope="module") +def cli_runner(base_app): + """Create a CLI runner for testing a CLI command.""" + + def cli_invoke(command, *args, input=None): + return base_app.test_cli_runner().invoke(command, args, input=input) + + return cli_invoke + + +def test_bulk_update_command( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, + cli_runner, +): + """Test the bulk-update command.""" + # Create a test community + community = minimal_community_factory( + metadata={"title": "Test Community"}, + slug="test-community", + ) + community_id = community.id + + # Create a test record in the community + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": "Test Record", + "publisher": "Test Publisher", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": "Test Creator", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": False, + }, + }, + community_list=[community_id], + ) + + # Test the command + result = cli_runner( + kcworks_records, + "bulk-update", + community_id, + "metadata.title", + '"Updated Title"', + ) + + assert result.exit_code == 0 + assert "Total records found: 1" in result.output + assert "Successfully updated: 1" in result.output + assert "Failed to update: 0" in result.output + + # Verify the record was updated + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert updated_record.data["metadata"]["title"] == "Updated Title" + + +def test_bulk_update_nested_field( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, + cli_runner, +): + """Test the bulk-update command with a nested field.""" + # Create a test community + community = minimal_community_factory( + metadata={"title": "Test Community"}, + slug="test-community", + ) + community_id = community.id + + # Create a test record in the community + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": "Test Record", + "publisher": "Test Publisher", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": "Test Creator", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": False, + }, + }, + community_list=[community_id], + ) + + # Test the command with a nested field + result = cli_runner( + kcworks_records, + "bulk-update", + community_id, + "metadata.creators.0.person_or_org.family_name", + '"New"', + ) + + assert result.exit_code == 0 + assert "Total records found: 1" in result.output + assert "Successfully updated: 1" in result.output + assert "Failed to update: 0" in result.output + + # Verify the record was updated + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert ( + updated_record.data["metadata"]["creators"][0]["person_or_org"]["family_name"] + == "New" + ) + assert ( + updated_record.data["metadata"]["creators"][0]["person_or_org"]["name"] + == "New, Test" + ) + + +def test_bulk_update_plain_string( + running_app, + db, + minimal_published_record_factory, + minimal_community_factory, + search_clear, + celery_worker, + mock_send_remote_api_update_fixture, + cli_runner, +): + """Test the bulk-update command with a plain string value.""" + # Create a test community + community = minimal_community_factory( + metadata={"title": "Test Community"}, + slug="test-community", + ) + community_id = community.id + + # Create a test record in the community + record = minimal_published_record_factory( + metadata={ + "metadata": { + "resource_type": {"id": "textDocument-journalArticle"}, + "title": "Test Record", + "publisher": "Test Publisher", + "publication_date": "2025-01-01", + "creators": [ + { + "person_or_org": { + "name": "Test Creator", + "family_name": "Creator", + "given_name": "Test", + "type": "personal", + } + } + ], + }, + "files": { + "enabled": False, + }, + }, + community_list=[community_id], + ) + + # Test the command with a plain string + result = cli_runner( + kcworks_records, + "bulk-update", + community_id, + "metadata.title", + "Plain String Title", + ) + + assert result.exit_code == 0 + assert "Total records found: 1" in result.output + assert "Successfully updated: 1" in result.output + assert "Failed to update: 0" in result.output + + # Verify the record was updated + updated_record = current_rdm_records_service.read(system_identity, record.id) + assert updated_record.data["metadata"]["title"] == "Plain String Title" + + +def test_bulk_update_nonexistent_community( + running_app, + db, + search_clear, + cli_runner, +): + """Test the bulk-update command with nonexistent community.""" + result = cli_runner( + kcworks_records, + "bulk-update", + "nonexistent", + "metadata.title", + "Updated Title", + ) + + assert result.exit_code == 0 # Command executes but prints error + assert "Community nonexistent not found" in result.output diff --git a/tests/fixtures/records.py b/tests/fixtures/records.py index 8165a58aa..d17217ad1 100644 --- a/tests/fixtures/records.py +++ b/tests/fixtures/records.py @@ -8,9 +8,12 @@ """Test fixtures for records.""" import copy +import mimetypes import re from datetime import timedelta +from pathlib import Path from pprint import pformat +from tempfile import SpooledTemporaryFile from typing import Any import arrow @@ -21,6 +24,8 @@ from invenio_access.permissions import system_identity from invenio_accounts.proxies import current_accounts from invenio_rdm_records.proxies import current_rdm_records_service as records_service +from invenio_record_importer_kcworks.services.files import FilesHelper +from invenio_record_importer_kcworks.types import FileData from invenio_record_importer_kcworks.utils.utils import replace_value_in_nested_dict from invenio_records_resources.services.records.results import RecordItem @@ -56,6 +61,7 @@ def _factory( identity: Identity | None = None, community_list: list[str] | None = None, set_default: bool = False, + file_paths: list[str] | None = None, **kwargs: Any, ) -> RecordItem: """Create a minimal published record. @@ -69,6 +75,8 @@ def _factory( the record (if any). Must be community UUIDs rather than slugs. set_default (bool, optional): If True, the first community in the list will be set as the default community for the record. + file_paths (list[str], optional): A list of strings representing the paths + to the files to add to the record. Returns: The published record as a service layer RecordItem. @@ -76,6 +84,44 @@ def _factory( input_metadata = metadata or record_metadata().metadata_in identity = identity or system_identity draft = records_service.create(identity, input_metadata) + + if file_paths: + current_app.logger.error(f"Adding files to record {draft.id}") + files_helper = FilesHelper(is_draft=True) + file_objects = [] + for file_path in file_paths: + current_app.logger.error( + f"Adding file {file_path} to record {draft.id}" + ) + with open(file_path, "rb") as f: + file_content = f.read() + # Create a SpooledTemporaryFile and write the file content to it + spooled_file = SpooledTemporaryFile() + with open(file_path, "rb") as f: + spooled_file.write(file_content) + spooled_file.seek(0) # Reset file pointer to beginning + + current_app.logger.error(f"Opening file {file_path}") + mimetype = mimetypes.guess_type(file_path)[0] or "application/pdf" + current_app.logger.error(f"Mimetype: {mimetype}") + file_object = FileData( + filename=Path(file_path).name, + content_type=mimetype, + mimetype=mimetype, + mimetype_params={}, + stream=spooled_file, + ) + current_app.logger.error(f"File object: {file_object}") + file_objects.append(file_object) + + file_result = files_helper.handle_record_files( + metadata=draft.to_dict(), + file_data=input_metadata.get("files", {}).get("entries", {}), + existing_record=None, + files=file_objects, + ) + current_app.logger.error(f"File result: {pformat(file_result)}") + published = records_service.publish(identity, draft.id) if community_list: record = published._record diff --git a/tests/fixtures/search_provisioning.py b/tests/fixtures/search_provisioning.py index 32cd583ba..da098643a 100644 --- a/tests/fixtures/search_provisioning.py +++ b/tests/fixtures/search_provisioning.py @@ -9,6 +9,7 @@ import pytest from celery import shared_task +import arrow @shared_task(bind=True) @@ -31,7 +32,15 @@ def mock_send_remote_api_update( ): """Mock the send_remote_api_update task.""" record = record or {} - pass + if service_type == "rdm_record" and service_method == "publish": + # Simulate a successful remote API update + record["custom_fields"] = record.get("custom_fields", {}) + record["custom_fields"]["kcr:commons_search_recid"] = "2E9SqY0Bdd2QL-HGeUuA" + record["custom_fields"]["kcr:commons_search_updated"] = arrow.utcnow().format( + "YYYY-MM-DDTHH:mm:ssZ" + ) + # Return a tuple of (response_text, callback_result) to match the real function + return "OK", None @pytest.fixture diff --git a/translations/en/LC_MESSAGES/messages.po b/translations/en/LC_MESSAGES/messages.po index 896f7dc3f..df017369b 100644 --- a/translations/en/LC_MESSAGES/messages.po +++ b/translations/en/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: scottia4@msu.edu\n" -"POT-Creation-Date: 2025-05-06 17:40-0400\n" +"POT-Creation-Date: 2025-05-28 19:13-0400\n" "PO-Revision-Date: 2025-04-24 10:38-0400\n" "Last-Translator: FULL NAME \n" "Language: en\n" @@ -322,6 +322,7 @@ msgstr "" #: /Users/ianscott/Development/knowledge-commons-works/invenio.cfg:797 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/build/lib/invenio_modular_detail_page/config.py:134 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/invenio_modular_detail_page/config.py:134 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:61 msgid "Content" msgstr "" @@ -1343,7 +1344,7 @@ msgstr "" #. NOTE: This is a note to a translator. #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/build/lib/invenio_modular_deposit_form/ext.py:39 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/invenio_modular_deposit_form/ext.py:39 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/invenio_modular_deposit_form/ext.py:38 msgid "A translation string" msgstr "" @@ -3060,6 +3061,49 @@ msgstr "" msgid "Invalid search field: {field_name}." msgstr "" +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:31 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:39 +msgid "Statistics" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:32 +msgid "This is the global stats dashboard." +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:40 +msgid "This is the community stats dashboard." +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:63 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:204 +msgid "Cumulative totals as of" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:159 +msgid "Contributions" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:161 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:293 +msgid "Activity during" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:203 +msgid "Traffic" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:291 +msgid "Usage" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/ext.py:48 +msgid "Stats" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/templates/semantic-ui/invenio_stats_dashboard/stats_dashboard.html:8 +msgid "Stats Dashboard" +msgstr "" + #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-vocabularies/invenio_vocabularies/contrib/awards/config.py:49 msgid "Funders" msgstr "" diff --git a/translations/messages.pot b/translations/messages.pot index 91e7c1a59..989c01d3a 100644 --- a/translations/messages.pot +++ b/translations/messages.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: scottia4@msu.edu\n" -"POT-Creation-Date: 2025-05-06 17:40-0400\n" +"POT-Creation-Date: 2025-05-28 19:13-0400\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -321,6 +321,7 @@ msgstr "" #: /Users/ianscott/Development/knowledge-commons-works/invenio.cfg:797 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/build/lib/invenio_modular_detail_page/config.py:134 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/invenio_modular_detail_page/config.py:134 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:61 msgid "Content" msgstr "" @@ -1342,7 +1343,7 @@ msgstr "" #. NOTE: This is a note to a translator. #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/build/lib/invenio_modular_deposit_form/ext.py:39 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/invenio_modular_deposit_form/ext.py:39 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-deposit-form/invenio_modular_deposit_form/ext.py:38 msgid "A translation string" msgstr "" @@ -3059,6 +3060,49 @@ msgstr "" msgid "Invalid search field: {field_name}." msgstr "" +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:31 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:39 +msgid "Statistics" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:32 +msgid "This is the global stats dashboard." +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:40 +msgid "This is the community stats dashboard." +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:63 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:204 +msgid "Cumulative totals as of" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:159 +msgid "Contributions" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:161 +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:293 +msgid "Activity during" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:203 +msgid "Traffic" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:291 +msgid "Usage" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/ext.py:48 +msgid "Stats" +msgstr "" + +#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/templates/semantic-ui/invenio_stats_dashboard/stats_dashboard.html:8 +msgid "Stats Dashboard" +msgstr "" + #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-vocabularies/invenio_vocabularies/contrib/awards/config.py:49 msgid "Funders" msgstr "" From bdce3407284e37753056b41c0708cdb44a722c4f Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Thu, 29 May 2025 00:14:30 -0400 Subject: [PATCH 3/7] feature: Finished new bulk-update CLI command; fixes to test fixtures --- site/kcworks/cli.py | 2 +- .../services/records/bulk_operations.py | 6 +- site/kcworks/services/records/cli.py | 1 - tests/api/test_api_record_ops.py | 21 +---- tests/fixtures/records.py | 69 +++++++++-------- tests/fixtures/search_provisioning.py | 2 +- translations/en/LC_MESSAGES/messages.po | 76 ++++++++----------- translations/messages.pot | 46 +---------- 8 files changed, 76 insertions(+), 147 deletions(-) diff --git a/site/kcworks/cli.py b/site/kcworks/cli.py index dce308e2e..ceb724de9 100644 --- a/site/kcworks/cli.py +++ b/site/kcworks/cli.py @@ -20,13 +20,13 @@ import click from flask.cli import with_appcontext from invenio_search.cli import abort_if_false, search_version_check +from kcworks.services.records.cli import kcworks_records as records_command from kcworks.services.search.indices import delete_index from kcworks.services.users.cli import group_users as group_users_command from kcworks.services.users.cli import groups as groups_command from kcworks.services.users.cli import name_parts as name_parts_command from kcworks.services.users.cli import read as read_command from kcworks.services.users.cli import user_groups as user_groups_command -from kcworks.services.records.cli import kcworks_records as records_command UNMANAGED_INDICES = [ "kcworks-stats-record-view", diff --git a/site/kcworks/services/records/bulk_operations.py b/site/kcworks/services/records/bulk_operations.py index 8a83beb24..d58509176 100644 --- a/site/kcworks/services/records/bulk_operations.py +++ b/site/kcworks/services/records/bulk_operations.py @@ -15,6 +15,8 @@ class UpdateResult(TypedDict): + """Result report after updating a record during a bulk operation.""" + total_record_count: int updated_record_count: int failed_record_count: int @@ -55,8 +57,8 @@ def update_community_records_metadata( try: current_communities.service.read(system_identity, community_id) - except PIDDoesNotExistError: - raise ValueError(f"Community {community_id} not found") + except PIDDoesNotExistError as e: + raise ValueError(f"Community {community_id} not found") from e prefix = current_app.config.get("SEARCH_INDEX_PREFIX", "") search = Search(using=current_search_client, index=f"{prefix}rdmrecords-records") diff --git a/site/kcworks/services/records/cli.py b/site/kcworks/services/records/cli.py index dd428ed5e..a3def0ea4 100644 --- a/site/kcworks/services/records/cli.py +++ b/site/kcworks/services/records/cli.py @@ -5,7 +5,6 @@ import click from flask.cli import with_appcontext - from kcworks.services.records.bulk_operations import update_community_records_metadata diff --git a/tests/api/test_api_record_ops.py b/tests/api/test_api_record_ops.py index 6e59bce57..0ef0e3334 100644 --- a/tests/api/test_api_record_ops.py +++ b/tests/api/test_api_record_ops.py @@ -97,7 +97,6 @@ def test_draft_creation_api( assert response.status_code == 201 actual_draft = response.json - app.logger.debug(f"actual_draft: {pformat(actual_draft)}") if self.errors or "errors" in actual_draft.keys(): assert actual_draft["errors"] == self.errors @@ -184,9 +183,6 @@ def test_draft_creation_service( metadata = record_metadata(metadata_in=self.metadata_source, owner_id=user_id) result = minimal_draft_record_factory(metadata=metadata.metadata_in) actual_draft = result.to_dict() - running_app.app.logger.debug(f"actual_draft: {pformat(actual_draft)}") - running_app.app.logger.info(f"actual_draft: {pformat(actual_draft)}") - running_app.app.logger.warning(f"actual_draft: {pformat(actual_draft)}") assert metadata.compare_draft( actual_draft, by_api=False, skip_fields=self.skip_fields ) @@ -251,7 +247,6 @@ def test_record_publication_api( ): """Test that a user can publish a draft record via the API.""" app = running_app.app - metadata = TestRecordMetadata(app=app) u = user_factory( email=user_data_set["user1"]["email"], password="test", @@ -263,35 +258,27 @@ def test_record_publication_api( user = u.user token = u.allowed_token + metadata = TestRecordMetadata(app=app, owner_id=user.id) with app.test_client() as client: logged_in_client = client_with_login(client, user) - app.logger.error("Creating draft record...") response = logged_in_client.post( f"{app.config['SITE_API_URL']}/records", data=json.dumps(metadata.metadata_in), headers={**headers, "Authorization": f"Bearer {token}"}, ) - app.logger.error(f"Draft creation response: {pformat(response.json)}") assert response.status_code == 201 actual_draft = response.json actual_draft_id = actual_draft["id"] - app.logger.error(f"Draft ID: {actual_draft_id}") publish_response = logged_in_client.post( f"{app.config['SITE_API_URL']}/records/{actual_draft_id}/draft" "/actions/publish", headers={**headers, "Authorization": f"Bearer {token}"}, ) - app.logger.error(f"Publish response status: {publish_response.status_code}") - if publish_response.status_code != 202: - app.logger.error( - f"Publish response error: {pformat(publish_response.json)}" - ) assert publish_response.status_code == 202 actual_published = publish_response.json - app.logger.error(f"Published record: {pformat(actual_published)}") assert actual_published["id"] == actual_draft_id assert actual_published["is_published"] assert not actual_published["is_draft"] @@ -301,7 +288,7 @@ def test_record_publication_api( assert actual_published["status"] == "published" # Compare the published metadata with the expected metadata - metadata.compare_published(actual_published, by_api=True) + metadata.compare_published(actual_published, by_api=True, method="publish") def test_record_publication_service( @@ -375,7 +362,6 @@ def test_record_draft_update_api( actual_draft_id = actual_draft["id"] metadata.update_metadata({"metadata|title": "A Romans Story 2"}) - app.logger.debug(f"metadata.metadata_in: {pformat(metadata.metadata_in)}") update_response = logged_in_client.put( f"{app.config['SITE_API_URL']}/records/{actual_draft_id}/draft", data=json.dumps(metadata.metadata_in), @@ -459,7 +445,6 @@ def test_record_published_update_service( ) user = u.user identity = get_identity(user) - app.logger.error(f"identity: {pformat(identity)}") identity.provides.add(authenticated_user) metadata = record_metadata(owner_id=user.id) @@ -467,7 +452,6 @@ def test_record_published_update_service( record = minimal_published_record_factory( metadata=metadata.metadata_in, identity=identity ) - running_app.app.logger.error(f"record: {pformat(record.to_dict())}") record_id = record.id new_draft = records_service.edit(identity, record_id) @@ -845,7 +829,6 @@ def test_record_view_api( }, } ) - app.logger.debug(f"metadata.metadata_in: {pformat(metadata.metadata_in)}") metadata.compare_published(actual=record, by_api=True) assert record["revision_id"] == 3 diff --git a/tests/fixtures/records.py b/tests/fixtures/records.py index d17217ad1..a618d51c1 100644 --- a/tests/fixtures/records.py +++ b/tests/fixtures/records.py @@ -7,9 +7,9 @@ """Test fixtures for records.""" -import copy import mimetypes import re +from copy import deepcopy from datetime import timedelta from pathlib import Path from pprint import pformat @@ -86,13 +86,9 @@ def _factory( draft = records_service.create(identity, input_metadata) if file_paths: - current_app.logger.error(f"Adding files to record {draft.id}") files_helper = FilesHelper(is_draft=True) file_objects = [] for file_path in file_paths: - current_app.logger.error( - f"Adding file {file_path} to record {draft.id}" - ) with open(file_path, "rb") as f: file_content = f.read() # Create a SpooledTemporaryFile and write the file content to it @@ -101,9 +97,7 @@ def _factory( spooled_file.write(file_content) spooled_file.seek(0) # Reset file pointer to beginning - current_app.logger.error(f"Opening file {file_path}") mimetype = mimetypes.guess_type(file_path)[0] or "application/pdf" - current_app.logger.error(f"Mimetype: {mimetype}") file_object = FileData( filename=Path(file_path).name, content_type=mimetype, @@ -111,7 +105,6 @@ def _factory( mimetype_params={}, stream=spooled_file, ) - current_app.logger.error(f"File object: {file_object}") file_objects.append(file_object) file_result = files_helper.handle_record_files( @@ -120,7 +113,6 @@ def _factory( existing_record=None, files=file_objects, ) - current_app.logger.error(f"File result: {pformat(file_result)}") published = records_service.publish(identity, draft.id) if community_list: @@ -217,8 +209,8 @@ def my_test_function(record_metadata): # Compare actual metadata dictionaries with expected metadata dictionaries # with variations seen in REST API results. - test_metadata.compare_draft_via_api(my_draft_dict_to_test, by_api=True) - test_metadata.compare_published_via_api(my_published_dict_to_test, by_api=True) + test_metadata.compare_draft_via_api(my_draft_dict_to_test, by_api=True, method="publish") + test_metadata.compare_published_via_api(my_published_dict_to_test, by_api=True, method="publish") ``` The input metadata dictionary can include the distinctive content used in the @@ -297,7 +289,7 @@ def __init__( community_list = community_list or [] file_entries = file_entries or {} self.app = app - starting_metadata_in = copy.deepcopy(TestRecordMetadata.default_metadata_in) + starting_metadata_in = deepcopy(TestRecordMetadata.default_metadata_in) self._metadata_in: dict = metadata_in if metadata_in else starting_metadata_in self.community_list = community_list self.file_entries = file_entries @@ -419,7 +411,7 @@ def draft(self): Fields that can't be set before record creation: """ - metadata_out_draft = copy.deepcopy(self.metadata_in) + metadata_out_draft = deepcopy(self.metadata_in) if not metadata_out_draft.get("access", {}): metadata_out_draft["access"] = { "files": "public", @@ -437,7 +429,7 @@ def draft(self): if metadata_out_draft["metadata"].get("resource_type", {}): current_resource_type = [ t - for t in copy.deepcopy(RESOURCE_TYPES) + for t in deepcopy(RESOURCE_TYPES) if t["id"] == metadata_out_draft["metadata"]["resource_type"].get("id") ][0] metadata_out_draft["metadata"]["resource_type"]["title"] = ( @@ -467,7 +459,7 @@ def draft(self): "access": { "grants": [], "links": [], - "owned_by": {"user": str(self.owner_id)}, + "owned_by": {"user": str(self.owner_id)} if self.owner_id else None, "settings": { "accept_conditions_text": None, "allow_guest_requests": False, @@ -567,7 +559,8 @@ def published(self): Fields that can't be set before record creation: """ - metadata_out_published = copy.deepcopy(self.draft) + metadata_out_published = deepcopy(self.draft) + metadata_out_published["status"] = "published" metadata_out_published["is_draft"] = False metadata_out_published["is_published"] = True metadata_out_published["versions"] = { @@ -581,9 +574,12 @@ def published(self): current_accounts.datastore.get_user_by_email(owner["email"]) for owner in owners_in ] - metadata_out_published["parent"]["access"]["owned_by"] = ( - {"user": str(owner_users[0].id)} if owner_users else None - ) + if owner_users: + metadata_out_published["parent"]["access"]["owned_by"] = { + "user": str(owner_users[0].id) + } + else: + metadata_out_published["parent"]["access"]["owned_by"] = None if len(owner_users) > 1: metadata_out_published["parent"]["access"]["grants"] = [ { @@ -612,6 +608,7 @@ def compare_draft( expected: dict | None = None, skip_fields: list[str] | None = None, by_api: bool = False, + method: str = "read", now: Arrow | None = None, ) -> bool: """Compare the draft metadata with the expected metadata by assertion. @@ -637,6 +634,9 @@ def compare_draft( in the return value from the REST API. Otherwise the format expected will be that returned from the RDMRecordService method. Defaults to False. + method (str, optional): The method used to get the metadata, since some + fields are only present in the REST API in response to certain methods. + Defaults to read. now (Arrow, optional): The current time. Defaults to arrow.utcnow(). skip_fields (list[str], optional): A list of field paths that are expected to be missing from the actual metadata due to validation errors. @@ -663,20 +663,17 @@ def compare_draft( - expires_at """ app = self.app - expected = self.draft.copy() if not expected else expected + expected = deepcopy(self.draft) if not expected else expected for skip_field in skip_fields or []: print(f"skip_field: {skip_field}") expected = remove_value_by_path(expected, skip_field) now = now or arrow.utcnow() - app.logger.info(f"actual: {pformat(actual)}") - print(f"actual: {pformat(actual['pids'])}") - print(f"expected: {pformat(expected['pids'])}") # ensure the id is in the correct format assert re.match(r"^[a-z0-9]{5}-[a-z0-9]{5}$", actual["id"]) if by_api: - expected = self._as_via_api(expected, is_draft=True) + expected = self._as_via_api(expected, is_draft=True, method=method) else: expected["parent"]["access"]["owned_by"] = None # TODO: Why? expected["stats"] = None @@ -775,13 +772,15 @@ def compare_draft( return True - def _as_via_api(self, metadata_in: dict, is_draft: bool = False) -> dict: + def _as_via_api( + self, metadata_in: dict, is_draft: bool = False, method: str = "read" + ) -> dict: """Return the metadata as it appears in the REST API.""" - if not is_draft: + if not is_draft and method != "publish": metadata_in["parent"]["access"].pop("grants") metadata_in["parent"]["access"].pop("links") metadata_in["versions"].pop("is_latest_draft") - else: + elif is_draft: del metadata_in["stats"] return metadata_in @@ -790,6 +789,7 @@ def compare_published( actual: dict, expected: dict | None = None, by_api: bool = False, + method: str = "read", now: Arrow | None = None, ) -> bool: """Compare the actual and expected metadata dictionaries. @@ -808,6 +808,9 @@ def compare_published( expected (dict): The expected metadata dictionary. by_api (bool, optional): Whether to compare the metadata as it appears in the REST API. Defaults to False. + method (str, optional): The method used to get the metadata, since some + fields are only present in the REST API in response to certain methods. + Defaults to read. now (Arrow, optional): The current time. Defaults to arrow.utcnow(). Returns: @@ -819,11 +822,11 @@ def compare_published( the expected metadata dictionary. """ app = self.app - expected = self.published.copy() if not expected else expected + expected = deepcopy(self.published) if not expected else expected now = now or arrow.utcnow() if by_api: - expected = self._as_via_api(expected, is_draft=False) + expected = self._as_via_api(expected, is_draft=False, method=method) try: assert now - arrow.get(actual["created"]) < timedelta(seconds=7) assert actual["custom_fields"] == expected["custom_fields"] @@ -885,9 +888,9 @@ def compare_published( ) assert actual["metadata"]["title"] == expected["metadata"]["title"] - expected["parent"]["access"]["owned_by"] = ( - {"user": str(self.owner_id)} if self.owner_id else None - ) + # expected["parent"]["access"]["owned_by"] = ( + # {"user": str(self.owner_id)} if self.owner_id else None + # ) assert actual["parent"]["access"] == expected["parent"]["access"] if self.community_list: @@ -1026,7 +1029,7 @@ def __init__( file_entries=file_entries, owner_id=owner_id, ) - starting_metadata_in = TestRecordMetadata.default_metadata_in.copy() + starting_metadata_in = deepcopy(TestRecordMetadata.default_metadata_in) self._metadata_in = metadata_in if metadata_in else starting_metadata_in self.record_id = record_id self.file_entries = file_entries diff --git a/tests/fixtures/search_provisioning.py b/tests/fixtures/search_provisioning.py index da098643a..70448bb54 100644 --- a/tests/fixtures/search_provisioning.py +++ b/tests/fixtures/search_provisioning.py @@ -7,9 +7,9 @@ """Search provisioning related pytest fixtures for testing.""" +import arrow import pytest from celery import shared_task -import arrow @shared_task(bind=True) diff --git a/translations/en/LC_MESSAGES/messages.po b/translations/en/LC_MESSAGES/messages.po index df017369b..83f6818c0 100644 --- a/translations/en/LC_MESSAGES/messages.po +++ b/translations/en/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: scottia4@msu.edu\n" -"POT-Creation-Date: 2025-05-28 19:13-0400\n" +"POT-Creation-Date: 2025-05-29 00:04-0400\n" "PO-Revision-Date: 2025-04-24 10:38-0400\n" "Last-Translator: FULL NAME \n" "Language: en\n" @@ -322,7 +322,6 @@ msgstr "" #: /Users/ianscott/Development/knowledge-commons-works/invenio.cfg:797 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/build/lib/invenio_modular_detail_page/config.py:134 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/invenio_modular_detail_page/config.py:134 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:61 msgid "Content" msgstr "" @@ -3061,49 +3060,6 @@ msgstr "" msgid "Invalid search field: {field_name}." msgstr "" -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:31 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:39 -msgid "Statistics" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:32 -msgid "This is the global stats dashboard." -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:40 -msgid "This is the community stats dashboard." -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:63 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:204 -msgid "Cumulative totals as of" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:159 -msgid "Contributions" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:161 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:293 -msgid "Activity during" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:203 -msgid "Traffic" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:291 -msgid "Usage" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/ext.py:48 -msgid "Stats" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/templates/semantic-ui/invenio_stats_dashboard/stats_dashboard.html:8 -msgid "Stats Dashboard" -msgstr "" - #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-vocabularies/invenio_vocabularies/contrib/awards/config.py:49 msgid "Funders" msgstr "" @@ -3729,3 +3685,33 @@ msgstr "" msgid "NEH Record ID" msgstr "" +#~ msgid "Statistics" +#~ msgstr "" + +#~ msgid "This is the global stats dashboard." +#~ msgstr "" + +#~ msgid "This is the community stats dashboard." +#~ msgstr "" + +#~ msgid "Cumulative totals as of" +#~ msgstr "" + +#~ msgid "Contributions" +#~ msgstr "" + +#~ msgid "Activity during" +#~ msgstr "" + +#~ msgid "Traffic" +#~ msgstr "" + +#~ msgid "Usage" +#~ msgstr "" + +#~ msgid "Stats" +#~ msgstr "" + +#~ msgid "Stats Dashboard" +#~ msgstr "" + diff --git a/translations/messages.pot b/translations/messages.pot index 989c01d3a..879506826 100644 --- a/translations/messages.pot +++ b/translations/messages.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: scottia4@msu.edu\n" -"POT-Creation-Date: 2025-05-28 19:13-0400\n" +"POT-Creation-Date: 2025-05-29 00:04-0400\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -321,7 +321,6 @@ msgstr "" #: /Users/ianscott/Development/knowledge-commons-works/invenio.cfg:797 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/build/lib/invenio_modular_detail_page/config.py:134 #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-modular-detail-page/invenio_modular_detail_page/config.py:134 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:61 msgid "Content" msgstr "" @@ -3060,49 +3059,6 @@ msgstr "" msgid "Invalid search field: {field_name}." msgstr "" -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:31 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:39 -msgid "Statistics" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:32 -msgid "This is the global stats dashboard." -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:40 -msgid "This is the community stats dashboard." -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:63 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:204 -msgid "Cumulative totals as of" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:159 -msgid "Contributions" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:161 -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:293 -msgid "Activity during" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:203 -msgid "Traffic" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/config.py:291 -msgid "Usage" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/ext.py:48 -msgid "Stats" -msgstr "" - -#: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-stats-dashboard/invenio_stats_dashboard/templates/semantic-ui/invenio_stats_dashboard/stats_dashboard.html:8 -msgid "Stats Dashboard" -msgstr "" - #: /Users/ianscott/Development/knowledge-commons-works/site/kcworks/dependencies/invenio-vocabularies/invenio_vocabularies/contrib/awards/config.py:49 msgid "Funders" msgstr "" From 281d3807008dcb8db0ce42ab733d3f4b0bca8544 Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Thu, 29 May 2025 09:57:03 -0400 Subject: [PATCH 4/7] Merge dev/bulkedit and version update to 0.5.2 --- README.md | 2 +- docs/source/README.md | 2 +- docs/source/changelog.md | 4 ++++ docs/source/conf.py | 2 +- pyproject.toml | 2 +- site/kcworks/__init__.py | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7ab183420..a3ec25c54 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Knowledge Commons Works is a collaborative tool for storing and sharing academic research. It is part of Knowledge Commons and is built on an instance of the InvenioRDM repository system. -Version 0.5.1 +Version 0.5.2 ## Copyright diff --git a/docs/source/README.md b/docs/source/README.md index f2a904206..6a19e3c39 100644 --- a/docs/source/README.md +++ b/docs/source/README.md @@ -6,7 +6,7 @@ ![KCWorks logo](../../static/images/kc_works_logos/SVG/kc_works_logo_wordmark.svg) -Version 0.5.1 +Version 0.5.2 Knowledge Commons Works is a collaborative platform for storing and sharing academic research. It is part of [Knowledge Commons](https://about.hcommons.org/) and is built on an instance of the [InvenioRDM](https://inveniordm.docs.cern.ch/) repository system. diff --git a/docs/source/changelog.md b/docs/source/changelog.md index 634da8bd4..b94bc3fad 100644 --- a/docs/source/changelog.md +++ b/docs/source/changelog.md @@ -3,6 +3,10 @@ # Changes +## 0.5.2 (2025-05-29) + +- Added a new CLI command to allow admins to bulk update a single metadata field to a single new fixed value for every record in a collection. + ## 0.5.1 (2025-05-13) - Removed BETA status from the site. diff --git a/docs/source/conf.py b/docs/source/conf.py index c30141a64..ef7541ec8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ project = "Knowledge Commons Works" copyright = "2025, Mesh Research" author = "Mesh Research" -release = "0.5.1" +release = "0.5.2" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/pyproject.toml b/pyproject.toml index 7091ee3e7..b3e51fadf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "kcworks" -version = "0.5.1" +version = "0.5.2" requires-python = ">=3.12" dependencies = [ "aiohttp>=3.11.15", diff --git a/site/kcworks/__init__.py b/site/kcworks/__init__.py index e7f7118d8..41186dad3 100644 --- a/site/kcworks/__init__.py +++ b/site/kcworks/__init__.py @@ -16,4 +16,4 @@ """KCWorks customizations to InvenioRDM.""" -__version__ = "0.5.1" +__version__ = "0.5.2" From dd03fd88b40719fcc4c14f7b8512b69d9482f94e Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Thu, 29 May 2025 10:45:04 -0400 Subject: [PATCH 5/7] fix(testing): More minor tweaks to satisfy ruff checks during testing --- tests/api/test_api_record_ops.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/api/test_api_record_ops.py b/tests/api/test_api_record_ops.py index 0ef0e3334..8eb0b341c 100644 --- a/tests/api/test_api_record_ops.py +++ b/tests/api/test_api_record_ops.py @@ -12,7 +12,6 @@ from collections.abc import Callable from datetime import timedelta from pathlib import Path -from pprint import pformat import arrow import pytest @@ -436,7 +435,6 @@ def test_record_published_update_service( mock_send_remote_api_update_fixture: Callable, ): """Test that a user can update a published record via the API.""" - app = running_app.app u = user_factory( email=user_data_set["user1"]["email"], password="test", From 0f0e28e64c184cac09ab39938c15314d900a63ad Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Thu, 29 May 2025 10:59:38 -0400 Subject: [PATCH 6/7] fix(testing): Still more minor tweaks to satisfy ruff checks during testing --- tests/fixtures/records.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/fixtures/records.py b/tests/fixtures/records.py index a618d51c1..d1e6e4cef 100644 --- a/tests/fixtures/records.py +++ b/tests/fixtures/records.py @@ -107,13 +107,6 @@ def _factory( ) file_objects.append(file_object) - file_result = files_helper.handle_record_files( - metadata=draft.to_dict(), - file_data=input_metadata.get("files", {}).get("entries", {}), - existing_record=None, - files=file_objects, - ) - published = records_service.publish(identity, draft.id) if community_list: record = published._record From ecbe8528794e5e96f3ac0c07f441c8aea4ce8893 Mon Sep 17 00:00:00 2001 From: Ian Scott Date: Thu, 29 May 2025 11:24:24 -0400 Subject: [PATCH 7/7] fix(testing): Still more minor tweaks to satisfy ruff checks during testing --- tests/fixtures/records.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/fixtures/records.py b/tests/fixtures/records.py index d1e6e4cef..6724b0c01 100644 --- a/tests/fixtures/records.py +++ b/tests/fixtures/records.py @@ -107,6 +107,13 @@ def _factory( ) file_objects.append(file_object) + files_helper.handle_record_files( + metadata=draft.to_dict(), + file_data=input_metadata.get("files", {}).get("entries", {}), + existing_record=None, + files=file_objects, + ) + published = records_service.publish(identity, draft.id) if community_list: record = published._record