Skip to content

[ENG-8145] [ENG-8147] Manual DOI and GUID for Preprints & Registrations - BE #11174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d0c8f6d
manual guid temp commit
cslzchen May 30, 2025
9329cec
manual guid for prerpint temp commit
cslzchen May 31, 2025
3a98bf3
Remove debugging logs
cslzchen May 31, 2025
5f92d12
Futher improvements
cslzchen May 31, 2025
27fa818
Redo migrations
cslzchen Jun 1, 2025
5eb0beb
hmm ... fix another regsiter_node
cslzchen Jun 1, 2025
0930675
Fix more tests
cslzchen Jun 1, 2025
220b841
Make sure manually assigned guid are valid; if not raise exceptions
cslzchen Jun 2, 2025
f3866c8
Add manual_guid to both Preprint and Registration serializers
cslzchen Jun 2, 2025
d2ccef8
added waffle flags
opaduchak Jun 3, 2025
2fa471a
added manual doi setting
opaduchak Jun 2, 2025
c911a8e
fixed waffle flags
opaduchak Jun 3, 2025
7fd10c5
Fix serializer tests
cslzchen Jun 3, 2025
0b9dc15
Fix sentry logs for GV request
cslzchen Jun 3, 2025
7327abd
Use helper _clear_cached_guid and rework ensure_guid
cslzchen Jun 3, 2025
251ad8f
fixed unit tests
opaduchak Jun 3, 2025
80e3444
Respond to CR
cslzchen Jun 4, 2025
c2bda56
Fix manual guid validation check
cslzchen Jun 4, 2025
03bf6c1
Merge pull request #11160 from cslzchen/hotfix/manual-guid-poc
cslzchen Jun 4, 2025
f87eedb
Merge branch 'feature/manual-guid-and-doi-assignment' into fix/ENG-8145
cslzchen Jun 4, 2025
7f4adf4
Respond to CR + Small refactor to sync both guid and doi work
cslzchen Jun 4, 2025
7c9f470
Merge pull request #11162 from opaduchak/fix/ENG-8145
cslzchen Jun 4, 2025
d9bae22
Update manual DOI assignment for Preprints
cslzchen Jun 5, 2025
285d047
Merge pull request #11172 from cslzchen/feature/update-manual-doi-for…
cslzchen Jun 5, 2025
c46e21b
Revert unnecessary changes in generate_guid()
cslzchen Jun 5, 2025
b0e00ce
added manual guid unittests
opaduchak Jun 9, 2025
990ff3e
fixed test_registration_draft_must_be_draft_of_current_node not being…
opaduchak Jun 10, 2025
2b09d94
Allow blank value when manually setting GUID and DOI
cslzchen Jun 10, 2025
bb62bed
Merge pull request #11175 from opaduchak/feature/manual-guid-unittests
cslzchen Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions api/preprints/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
from api.institutions.utils import update_institutions_if_user_associated
from api.preprints.fields import DOIField
from api.taxonomies.serializers import TaxonomizableSerializerMixin
from api.waffle.utils import flag_is_active
from framework.exceptions import PermissionsError, UnpublishedPendingPreprintVersionExists
from osf import features
from website.project import signals as project_signals
from osf.exceptions import NodeStateError, PreprintStateError
from osf.models import (
Expand Down Expand Up @@ -499,16 +501,25 @@ class PreprintCreateSerializer(PreprintSerializer):
# Overrides PreprintSerializer to make id nullable, adds `create`
# TODO: add better Docstrings
id = IDField(source='_id', required=False, allow_null=True)
manual_guid = ser.CharField(write_only=True, required=False, allow_null=True, allow_blank=True)
manual_doi = ser.CharField(write_only=True, required=False, allow_null=True, allow_blank=True)

def create(self, validated_data):

creator = self.context['request'].user
provider = validated_data.pop('provider', None)
if not provider:
raise exceptions.ValidationError(detail='You must specify a valid provider to create a preprint.')

title = validated_data.pop('title')
description = validated_data.pop('description', '')
preprint = Preprint.create(provider=provider, title=title, creator=creator, description=description)

# For manual GUID and DOI assignment during creation for privileged users
manual_guid = validated_data.pop('manual_guid', None)
manual_doi = validated_data.pop('manual_doi', None)
if manual_doi and not flag_is_active(self.context['request'], features.MANUAL_DOI_AND_GUID):
raise exceptions.ValidationError(detail='Manual DOI assignment is not allowed.')

preprint = Preprint.create(provider=provider, title=title, creator=creator, description=description, manual_guid=manual_guid, manual_doi=manual_doi)

return self.update(preprint, validated_data)

Expand Down
16 changes: 15 additions & 1 deletion api/registrations/serializers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import pytz
import json

from api.waffle.utils import flag_is_active
from osf import features
from website.archiver.utils import normalize_unicode_filenames

from packaging.version import Version
Expand Down Expand Up @@ -733,6 +736,10 @@ def __init__(self, *args, **kwargs):
else:
self.fields['draft_registration'] = ser.CharField(write_only=True)

# For manual GUID and DOI assignment during creation for privileged users
manual_guid = ser.CharField(write_only=True, required=False, allow_null=True, allow_blank=True)
manual_doi = ser.CharField(write_only=True, required=False, allow_null=True, allow_blank=True)

# For newer versions
embargo_end_date = VersionedDateTimeField(write_only=True, allow_null=True, default=None)
included_node_ids = ser.ListField(write_only=True, required=False)
Expand Down Expand Up @@ -786,6 +793,8 @@ def get_children_by_version(self, validated_data):
return validated_data.get('children', [])

def create(self, validated_data):

manual_guid = validated_data.pop('manual_guid', None)
auth = get_user_auth(self.context['request'])
draft = validated_data.pop('draft', None)
registration_choice = self.get_registration_choice_by_version(validated_data)
Expand All @@ -810,7 +819,7 @@ def create(self, validated_data):
)

try:
registration = draft.register(auth, save=True, child_ids=children)
registration = draft.register(auth, save=True, child_ids=children, manual_guid=manual_guid)
except NodeStateError as err:
raise exceptions.ValidationError(err)

Expand All @@ -823,6 +832,11 @@ def create(self, validated_data):
except ValidationError as err:
raise exceptions.ValidationError(err.message)
else:
manual_doi = validated_data.pop('manual_doi', None)
if manual_doi:
if not flag_is_active(self.context['request'], features.MANUAL_DOI_AND_GUID):
raise exceptions.ValidationError(detail='Manual DOI assignment is not allowed.')
registration.set_identifier_value('doi', manual_doi)
try:
registration.require_approval(auth.user)
except NodeStateError as err:
Expand Down
28 changes: 27 additions & 1 deletion api_tests/preprints/views/test_preprint_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest
from django.utils import timezone
from waffle.testutils import override_switch
from waffle.testutils import override_switch, override_flag

from addons.github.models import GithubFile
from api.base.settings.defaults import API_BASE
Expand Down Expand Up @@ -351,6 +351,8 @@ def setUp(self):

self.user_two = AuthUserFactory()
self.url = f'/{API_BASE}preprints/'
self.manual_guid = 'abcde'
self.manual_doi = '10.70102/FK2osf.io/abcde'

def publish_preprint(self, preprint, user, expect_errors=False):
preprint_file = test_utils.create_test_preprint_file(preprint, user, 'coffee_manuscript.pdf')
Expand All @@ -362,6 +364,30 @@ def publish_preprint(self, preprint, user, expect_errors=False):
)
return res

@property
def manual_guid_payload(self):
return {
'manual_doi': self.manual_doi,
'manual_guid': self.manual_guid,
}

def test_fail_create_prerprint_with_manual_guid(self):
public_project_payload = build_preprint_create_payload(self.public_project._id, self.provider._id, attrs=self.manual_guid_payload)
res = self.app.post_json_api(self.url, public_project_payload, auth=self.user.auth, expect_errors=True)
assert res.status_code == 400
print(res.status_code)

def test_create_preprint_with_manual_guid(self):
public_project_payload = build_preprint_create_payload(self.public_project._id, self.provider._id, attrs=self.manual_guid_payload)
with override_flag(features.MANUAL_DOI_AND_GUID, True):
res = self.app.post_json_api(self.url, public_project_payload, auth=self.user.auth, )
data = res.json['data']
assert res.status_code == 201
assert data['id'] == f'{self.manual_guid}_v1', 'manual guid was not assigned'
identifiers_response = self.app.get(data['relationships']['identifiers']['links']['related']['href'], auth=self.user.auth)
assert identifiers_response.status_code == 200
assert identifiers_response.json['data'][0]['attributes']['value'] == self.manual_doi

def test_create_preprint_with_supplemental_public_project(self):
public_project_payload = build_preprint_create_payload(self.public_project._id, self.provider._id)

Expand Down
2 changes: 2 additions & 0 deletions api_tests/registrations/views/test_registration_detail.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ def test_registration_fields_are_read_only(self):
'custom_citation',
'category',
'provider_specific_metadata',
'manual_guid',
]
for field in RegistrationSerializer._declared_fields:
reg_field = RegistrationSerializer._declared_fields[field]
Expand All @@ -619,6 +620,7 @@ def test_registration_detail_fields_are_read_only(self):
'custom_citation',
'category',
'provider_specific_metadata',
'manual_guid',
]
for field in RegistrationDetailSerializer._declared_fields:
reg_field = RegistrationSerializer._declared_fields[field]
Expand Down
38 changes: 38 additions & 0 deletions api_tests/registrations/views/test_registration_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@

from urllib.parse import urljoin, urlparse

from waffle import testutils

from api.base.settings.defaults import API_BASE
from api.base.versioning import CREATE_REGISTRATION_FIELD_CHANGE_VERSION
from api_tests.nodes.views.test_node_draft_registration_list import AbstractDraftRegistrationTestCase
from api_tests.subjects.mixins import SubjectsFilterMixin
from api_tests.registrations.filters.test_filters import RegistrationListFilteringMixin
from api_tests.utils import create_test_file
from framework.auth.core import Auth
from osf import features
from osf.models import RegistrationSchema, Registration
from osf_tests.factories import (
EmbargoFactory,
Expand Down Expand Up @@ -1559,6 +1562,41 @@ def test_registration_draft_must_be_draft_of_current_node(
self, mock_enqueue, app, user, schema, url_registrations_ver):
# Overrides TestNodeRegistrationCreate - node is not in URL in this workflow
return
@pytest.fixture
def manual_guid(self):
return 'abcde'

@pytest.fixture
def manual_doi(self):
return '10.70102/FK2osf.io/abcde'

@pytest.fixture
def enable_flag(self):
with testutils.override_flag(features.MANUAL_DOI_AND_GUID, True):
yield

@pytest.fixture
def manual_guid_payload(self, payload, manual_guid, manual_doi):
payload['data']['attributes'] |= {
'manual_doi': manual_doi,
'manual_guid': manual_guid,
}

return payload

def test_fail_create_registration_with_manual_guid(self, app, user, schema, url_registrations, manual_guid_payload, manual_guid, manual_doi):
res = app.post_json_api(url_registrations, manual_guid_payload, auth=user.auth, expect_errors=True)
assert res.status_code == 400
print(res.status_code)

def test_create_registration_with_manual_guid(self, app, user, schema, url_registrations, manual_guid_payload, manual_guid, manual_doi, enable_flag):
res = app.post_json_api(url_registrations, manual_guid_payload, auth=user.auth)
data = res.json['data']
assert res.status_code == 201
assert data['id'] == manual_guid, 'manual guid was not assigned'
identifiers_response = app.get(data['relationships']['identifiers']['links']['related']['href'], auth=user.auth)
assert identifiers_response.status_code == 200
assert identifiers_response.json['data'][0]['attributes']['value'] == manual_doi

@mock.patch('framework.celery_tasks.handlers.enqueue_task')
def test_need_admin_perms_on_draft(
Expand Down
3 changes: 2 additions & 1 deletion osf/external/gravy_valet/request_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import requests
from requests.exceptions import RequestException

from framework import sentry
from website import settings
from . import auth_helpers

Expand Down Expand Up @@ -275,7 +276,7 @@ def _make_gv_request(
return None
if not response.ok:
# log error to Sentry
logger.error(f"GV request failed with status code {response.status_code}: {response.content}")
sentry.log_message(f"GV request failed with status code {response.status_code}")
pass
return response

Expand Down
6 changes: 6 additions & 0 deletions osf/features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# 5. When a flag name is no longer referenced anywhere in this repo or in the Ember app remove it from this list.
flags:

- flag_name: MANUAL_DOI_AND_GUID
name: manual_doi_and_guid
note: This is used to allow certain product staff members to manually assign doi and guid during Registration or
Preprint creation. DO NOT CHANGE UNLESS ABSOLUTELY NECESSARY.
everyone: false

- flag_name: ENABLE_GV
name: gravy_waffle
note: This is used to enable GravyValet, the system responible for addons, this will remove the files widget on the
Expand Down
19 changes: 19 additions & 0 deletions osf/migrations/0030_abstractnode__manual_guid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.15 on 2025-06-04 13:26

from django.db import migrations
import osf.utils.fields


class Migration(migrations.Migration):

dependencies = [
('osf', '0029_remove_abstractnode_keenio_read_key'),
]

operations = [
migrations.AddField(
model_name='abstractnode',
name='_manual_guid',
field=osf.utils.fields.LowercaseCharField(blank=True, default=None, max_length=255, null=True),
),
]
48 changes: 38 additions & 10 deletions osf/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@
def _check_blacklist(guid):
return BlackListGuid.objects.filter(guid=guid).exists()

def check_manually_assigned_guid(guid_id, length=5):
if not guid_id or not isinstance(guid_id, str) or len(guid_id) != length:
logger.error(f'Invalid GUID: guid_id={guid_id}')
return False
if _check_blacklist(guid_id):
logger.error(f'Blacklisted GUID: guid_id={guid_id}')
return False
if Guid.objects.filter(_id=guid_id).exists():
logger.error(f'Duplicate GUID: guid_id={guid_id}')
return False
return True


def generate_guid(length=5):
while True:
Expand Down Expand Up @@ -602,6 +614,13 @@ def get_semantic_iri(self):
raise ValueError(f'no osfid for {self} (cannot build semantic iri)')
return osfid_iri(_osfid)


def _clear_cached_guid(instance):
has_cached_guids = hasattr(instance, '_prefetched_objects_cache') and 'guids' in instance._prefetched_objects_cache
if has_cached_guids:
del instance._prefetched_objects_cache['guids']


@receiver(post_save)
def ensure_guid(sender, instance, **kwargs):
"""Generate guid if it doesn't exist for subclasses of GuidMixin except for subclasses of VersionedGuidMixin
Expand All @@ -615,17 +634,26 @@ def ensure_guid(sender, instance, **kwargs):
# Only the initial or the latest version is referred to by the base guid in the Guid table. All versions have
# their "versioned" guid in the GuidVersionsThrough table.
return False

from osf.models import Registration
if issubclass(sender, Registration) and instance._manual_guid:
# Note: Only skip default GUID generation if the registration has `_manual_guid` set
# Note: Must clear guid cached because registration is cloned and cast from a draft registration
_clear_cached_guid(instance)
return False

existing_guids = Guid.objects.filter(
object_id=instance.pk,
content_type=ContentType.objects.get_for_model(instance)
)
has_cached_guids = hasattr(instance, '_prefetched_objects_cache') and 'guids' in instance._prefetched_objects_cache
if not existing_guids.exists():
# Clear query cache of instance.guids
if has_cached_guids:
del instance._prefetched_objects_cache['guids']
Guid.objects.create(
object_id=instance.pk,
content_type=ContentType.objects.get_for_model(instance),
_id=generate_guid(instance.__guid_min_length__)
)
if existing_guids.exists():
return False

# Note: must clear cached guid because the instance could be cloned and cast from existing instance.
_clear_cached_guid(instance)
Guid.objects.create(
object_id=instance.pk,
content_type=ContentType.objects.get_for_model(instance),
_id=generate_guid(instance.__guid_min_length__)
)
return True
4 changes: 2 additions & 2 deletions osf/models/draft_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def convert_draft_node_to_node(self, auth):
)
return

def register_node(self, schema, auth, draft_registration, parent=None, child_ids=None, provider=None):
def register_node(self, schema, auth, draft_registration, parent=None, child_ids=None, provider=None, manual_guid=None):
"""Converts the DraftNode to a Node, copies editable fields from the DraftRegistration back to the Node,
and then registers the Node

Expand All @@ -73,4 +73,4 @@ def register_node(self, schema, auth, draft_registration, parent=None, child_ids
self.copy_editable_fields(draft_registration, save=True)

# Calls super on Node, since self is no longer a DraftNode
return super(Node, self).register_node(schema, auth, draft_registration, parent, child_ids, provider)
return super(Node, self).register_node(schema, auth, draft_registration, parent=parent, child_ids=child_ids, provider=provider, manual_guid=manual_guid)
Loading