Skip to content

Add new tests for update Datacite metadata celery task #11178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: feature/verified-resource-linking
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions tests/identifiers/test_datacite_verified_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import pytest
from unittest import mock

from osf.metadata.serializers.datacite.datacite_tree_walker import DataciteTreeWalker
from osf.metadata import gather

from osf_tests.factories import NodeFactory, UserFactory


@pytest.mark.django_db
class TestDataCiteVerifiedLinks:

@pytest.fixture
def node(self):
return NodeFactory(is_public=True)

@pytest.fixture
def basket(self, node):
from osf.metadata.osf_gathering import OsfFocus
focus = OsfFocus(node)
return gather.Basket(focus)

@pytest.fixture
def tree_walker(self, basket):
def mock_visit(parent, tag, text=None, attrib=None, is_list=False):
element = {'tag': tag, 'text': text, 'attrib': attrib or {}}
if is_list:
element['children'] = []
return element

walker = DataciteTreeWalker(basket, None, mock_visit)
walker.visit = mock_visit
return walker

@mock.patch('framework.sentry.log_message')
def test_valid_verified_links(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': 'https://example.com/dataset', 'resource_type': 'dataset'},
{'target_url': 'https://github.com/user/repo', 'resource_type': 'software'},
{'target_url': 'https://osf.io/abc123/', 'resource_type': 'text'}
]

parent_el = {'tag': 'resource', 'children': []}

tree_walker._visit_related_and_verified_links(parent_el)

mock_gravy_valet_get_verified_links.assert_called_once_with(tree_walker.basket.focus.dbmodel._id)

mock_sentry_log.assert_not_called()

@mock.patch('framework.sentry.log_message')
def test_missing_target_url(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'resource_type': 'dataset'},
{'target_url': 'https://example.com/valid', 'resource_type': 'software'} # Valid link
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
assert 'Skipped items for node' in call_args[0]
assert 'Missing data: [link=None, type=dataset]' in call_args[0]

@mock.patch('framework.sentry.log_message')
def test_missing_resource_type(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': 'https://example.com/dataset'}, # Missing resource_type
{'target_url': 'https://example.com/valid', 'resource_type': 'software'} # Valid link
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
assert 'Skipped items for node' in call_args[0]
assert 'Missing data: [link=https://example.com/dataset, type=None]' in call_args[0]

@mock.patch('framework.sentry.log_message')
def test_invalid_url_format(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': 'not-a-valid-url', 'resource_type': 'dataset'},
{'target_url': 'also.invalid', 'resource_type': 'software'},
{'target_url': 'https://example.com/valid', 'resource_type': 'text'}
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
assert 'Skipped items for node' in call_args[0]
assert 'Invalid link: [link=not-a-valid-url, type=dataset]' in call_args[0]
assert 'Invalid link: [link=also.invalid, type=software]' in call_args[0]

@mock.patch('framework.sentry.log_message')
def test_multiple_issues_combined(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'resource_type': 'dataset'},
{'target_url': 'invalid-url', 'resource_type': 'software'},
{'target_url': 'https://example.com/missing-type'},
{'target_url': 'https://example.com/valid', 'resource_type': 'text'}
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
log_message = call_args[0]

assert 'Skipped items for node' in log_message
assert 'Missing data: [link=None, type=dataset]' in log_message
assert 'Invalid link: [link=invalid-url, type=software]' in log_message
assert 'Missing data: [link=https://example.com/missing-type, type=None]' in log_message

@mock.patch('framework.sentry.log_message')
def test_empty_verified_links(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = []

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_gravy_valet_get_verified_links.assert_called_once()

mock_sentry_log.assert_not_called()

@mock.patch('framework.sentry.log_message')
def test_resource_type_title_case(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': 'https://example.com/dataset', 'resource_type': 'dataset'},
{'target_url': 'https://example.com/software', 'resource_type': 'software'},
{'target_url': 'https://example.com/text', 'resource_type': 'text'}
]

parent_el = {'tag': 'resource', 'children': []}

visit_calls = []
original_visit = tree_walker.visit

def capture_visit(*args, **kwargs):
visit_calls.append((args, kwargs))
return original_visit(*args, **kwargs)

tree_walker.visit = capture_visit
tree_walker._visit_related_and_verified_links(parent_el)

related_identifier_calls = [
call for call in visit_calls
if len(call[0]) >= 2 and call[0][1] == 'relatedIdentifier'
]

assert len(related_identifier_calls) == 3

resource_types = [call[1]['attrib']['resourceTypeGeneral'] for call in related_identifier_calls]
assert 'Dataset' in resource_types
assert 'Software' in resource_types
assert 'Text' in resource_types

def test_non_abstract_node_skipped(self, tree_walker, mock_gravy_valet_get_verified_links):
from osf.metadata.osf_gathering import OsfFocus
user = UserFactory()
focus = OsfFocus(user)
tree_walker.basket = gather.Basket(focus)

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_gravy_valet_get_verified_links.assert_not_called()

@mock.patch('framework.sentry.log_message')
def test_edge_case_empty_strings(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': '', 'resource_type': 'dataset'},
{'target_url': 'https://example.com/valid', 'resource_type': ''},
{'target_url': 'https://example.com/valid2', 'resource_type': 'software'}
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
log_message = call_args[0]

assert 'Missing data: [link=, type=dataset]' in log_message
assert 'Missing data: [link=https://example.com/valid, type=]' in log_message

@mock.patch('framework.sentry.log_message')
def test_url_validation_edge_cases(self, mock_sentry_log, tree_walker, mock_gravy_valet_get_verified_links):
mock_gravy_valet_get_verified_links.return_value = [
{'target_url': 'http://example.com', 'resource_type': 'dataset'},
{'target_url': 'https://example.com', 'resource_type': 'software'},
{'target_url': 'ftp://example.com/file', 'resource_type': 'text'},
{'target_url': 'example.com', 'resource_type': 'dataset'},
{'target_url': 'www.example.com', 'resource_type': 'software'},
]

parent_el = {'tag': 'resource', 'children': []}
tree_walker._visit_related_and_verified_links(parent_el)

mock_sentry_log.assert_called_once()
call_args = mock_sentry_log.call_args[0]
log_message = call_args[0]

assert 'Invalid link: [link=example.com, type=dataset]' in log_message
assert 'Invalid link: [link=www.example.com, type=software]' in log_message
125 changes: 125 additions & 0 deletions tests/identifiers/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pytest
from unittest import mock
from django.utils import timezone
from celery.exceptions import RetryTaskError

from osf_tests.factories import UserFactory, NodeFactory
from website.identifiers.tasks import task__update_verified_links
from website.identifiers.clients.datacite import DataCiteClient

pytestmark = pytest.mark.django_db

@pytest.fixture()
def user():
return UserFactory()

@pytest.fixture()
def node(user):
return NodeFactory(creator=user, is_public=True)

@pytest.fixture()
def node_with_doi(user):
node = NodeFactory(creator=user, is_public=True)
node.set_identifier_value('doi', '10.1234/test')
return node

@pytest.fixture()
def mock_datacite_client():
with mock.patch('website.identifiers.clients.datacite.DataCiteClient') as mock_client:
mock_instance = mock.Mock(spec=DataCiteClient)
mock_instance.update_identifier.return_value = {'doi': '10.1234/test'}
mock_client.return_value = mock_instance
yield mock_instance

@pytest.fixture(autouse=True)
def mock_get_doi_client(mock_datacite_client):
with mock.patch('osf.models.AbstractNode.get_doi_client', return_value=mock_datacite_client):
yield

@pytest.mark.enable_enqueue_task
class TestUpdateDOIMetadataWithVerifiedLinks:

def test_update_doi_metadata_success(self, node_with_doi, mock_datacite_client):
verified_links = {
'https://osf.io/': 'Text',
f'https://osf.io/{node_with_doi._id}/': 'Text'
}
node_with_doi.verified_links = verified_links
node_with_doi.save()

task__update_verified_links.delay(node_with_doi._id)

node_with_doi.reload()
assert node_with_doi.verified_links == verified_links
mock_datacite_client.update_identifier.assert_called_once_with(node_with_doi, 'doi')

def test_update_doi_metadata_no_doi_no_create(self, node, mock_datacite_client):
task__update_verified_links.delay(node._id)
mock_datacite_client.update_identifier.assert_not_called()


@mock.patch('framework.sentry.log_message')
@mock.patch('website.identifiers.tasks.task__update_verified_links.retry')
def test_update_doi_metadata_deleted_node(self, mock_retry, mock_sentry_log, node_with_doi, mock_datacite_client):
node_with_doi.is_deleted = True
node_with_doi.deleted = timezone.now()
node_with_doi.save()

mock_datacite_client.update_identifier.side_effect = Exception('Node is deleted')
mock_retry.side_effect = Exception('Retry prevented')

with pytest.raises(Exception, match='Retry prevented'):
task__update_verified_links.delay(node_with_doi._id)

mock_sentry_log.assert_called_with(
'Failed to update DOI metadata with verified links',
extra_data={'guid': node_with_doi._id, 'error': mock.ANY},
level=mock.ANY
)

@mock.patch('website.identifiers.tasks.task__update_verified_links.retry')
def test_update_doi_metadata_exception_retry(self, mock_retry, node_with_doi, mock_datacite_client):
mock_datacite_client.update_identifier.side_effect = Exception('Test error')
mock_retry.side_effect = RetryTaskError()

with pytest.raises(RetryTaskError):
task__update_verified_links.delay(node_with_doi._id)

mock_datacite_client.update_identifier.assert_called_once_with(node_with_doi, 'doi')
mock_retry.assert_called_once_with(exc=mock.ANY)

@mock.patch('framework.sentry.log_message')
def test_update_doi_metadata_success_log(self, mock_log, node_with_doi, mock_datacite_client):
task__update_verified_links.delay(node_with_doi._id)

mock_log.assert_any_call(
'DOI metadata with verified links updated for guid',
extra_data={'guid': node_with_doi._id},
level=mock.ANY
)
mock_datacite_client.update_identifier.assert_called_once_with(node_with_doi, 'doi')

def test_update_doi_metadata_with_multiple_verified_links(self, node_with_doi, mock_datacite_client):
verified_links = {
'https://osf.io/': 'Text',
f'https://osf.io/{node_with_doi._id}/': 'Text',
'https://example.com': 'Text',
'https://test.org': 'Text'
}
node_with_doi.verified_links = verified_links
node_with_doi.save()

task__update_verified_links.delay(node_with_doi._id)

node_with_doi.reload()
assert node_with_doi.verified_links == verified_links
mock_datacite_client.update_identifier.assert_called_once_with(node_with_doi, 'doi')

def test_update_doi_metadata_private_node(self, node_with_doi, mock_datacite_client):
node_with_doi.is_public = False
node_with_doi.save()
mock_datacite_client.reset_mock()

task__update_verified_links.delay(node_with_doi._id)

mock_datacite_client.update_identifier.assert_called_once_with(node_with_doi, 'doi')
7 changes: 6 additions & 1 deletion website/identifiers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ def task__update_verified_links(self, target_guid):
try:
target_object.request_identifier_update(category='doi')
logger.debug(f'DOI metadata for guid with verified links updated: [guid={target_guid}]')
except GVException as e:
except Exception as e:
sentry.log_message(
'Failed to update DOI metadata with verified links',
extra_data={'guid': target_guid, 'error': str(e)},
level=logging.ERROR
)
logger.error(f'DOI metadata for guid with verified links failed to update: [guid={target_guid}]')
raise self.retry(exc=e)

Expand Down
Loading