From af108e4690a02b92aa312e3923f858d4f95c3e58 Mon Sep 17 00:00:00 2001 From: ihorsokhanexoft Date: Mon, 21 Apr 2025 21:16:17 +0300 Subject: [PATCH 01/23] gdpr deletion shouldn't take into account deleted nodes (#11098) ## Purpose When user has a deleted node where he is the only admin, osf admin can't gdpr delete this user because of this error ## Changes Ignore this message for deleted nodes. Added a test ## Ticket https://openscience.atlassian.net/browse/ENG-7557 --- admin_tests/users/test_views.py | 10 ++++++++++ osf/models/user.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/admin_tests/users/test_views.py b/admin_tests/users/test_views.py index cd51459e134..1b878f00509 100644 --- a/admin_tests/users/test_views.py +++ b/admin_tests/users/test_views.py @@ -141,6 +141,16 @@ def test_correct_view_permissions(self): response = self.view.as_view()(request, guid=user._id) self.assertEqual(response.status_code, 302) + def test_user_with_deleted_node_is_deleted(self): + patch_messages(self.request) + + project = ProjectFactory(creator=self.user, is_deleted=True) + assert self.user.nodes.filter(id=project.id, is_deleted=True).count() + + self.view().post(self.request) + self.user.reload() + assert self.user.deleted + class TestDisableUser(AdminTestCase): def setUp(self): diff --git a/osf/models/user.py b/osf/models/user.py index 42bf8d12929..8d3a844c711 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -1986,7 +1986,7 @@ def _validate_admin_status_for_gdpr_delete(self, resource): is_active=True ).exclude(id=self.id).exists() - if not alternate_admins: + if not resource.deleted and not alternate_admins: raise UserStateError( f'You cannot delete {resource.__class__.__name__} {resource._id} because it would be ' f'a {resource.__class__.__name__} with contributors, but with no admin.' From bb1892c364993cf446676fce6be87923dee166bf Mon Sep 17 00:00:00 2001 From: Ostap Zherebetskyi Date: Mon, 14 Apr 2025 15:29:05 +0300 Subject: [PATCH 02/23] Remove outdated tests --- framework/forms/utils.py | 28 -------- tests/test_events.py | 31 -------- tests/test_metadata.py | 13 ---- tests/test_misc_views.py | 47 ++++++------ tests/test_node_licenses.py | 138 ------------------------------------ tests/test_preprints.py | 13 ---- 6 files changed, 23 insertions(+), 247 deletions(-) delete mode 100644 tests/test_node_licenses.py diff --git a/framework/forms/utils.py b/framework/forms/utils.py index 420d70bcaf0..973ed310481 100644 --- a/framework/forms/utils.py +++ b/framework/forms/utils.py @@ -9,34 +9,6 @@ def sanitize(s, **kwargs): return sanitize_html(s, **kwargs) -def process_data(data, func): - if isinstance(data, dict): - return { - key: process_data(value, func) - for key, value in data.items() - } - elif isinstance(data, list): - return [ - process_data(item, func) - for item in data - ] - return func(data) - - -def process_payload(data): - return process_data( - data, - lambda value: quote(value.encode('utf-8') if value else '', safe=' ') - ) - - -def unprocess_payload(data): - return process_data( - data, - lambda value: unquote(value.encode('utf-8') if value else '') - ) - - def jsonify(form): """Cast WTForm to JSON object. diff --git a/tests/test_events.py b/tests/test_events.py index 55b51fb3e8e..866bf6ec337 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -18,37 +18,6 @@ email_digest = 'email_digest' -class TestEventNotImplemented(OsfTestCase): - """ - Test non-implemented errors - """ - @register('not_implemented') - class NotImplementedEvent(Event): - pass - - def setUp(self): - super().setUp() - self.user = factories.UserFactory() - self.auth = Auth(user=self.user) - self.node = factories.ProjectFactory(creator=self.user) - self.event = self.NotImplementedEvent(self.user, self.node, 'not_implemented') - - def test_text(self): - with raises(NotImplementedError): - text = self.event.text_message - - def test_html(self): - with raises(NotImplementedError): - html = self.event.html_message - - def test_url(self): - with raises(NotImplementedError): - url = self.event.url - - def test_event(self): - with raises(NotImplementedError): - event = self.event.event_type - class TestListOfFiles(OsfTestCase): """ diff --git a/tests/test_metadata.py b/tests/test_metadata.py index c29365f4151..5f81c35fc5c 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -3,7 +3,6 @@ import pytest from django.core.exceptions import ValidationError -from framework.forms.utils import process_payload from osf.models import RegistrationSchema from osf.utils.migrations import ensure_schemas from website.project.metadata.schemas import OSF_META_SCHEMA_FILES @@ -31,18 +30,6 @@ def test_registrationschema_is_fine_with_same_name_but_different_version(self): RegistrationSchema(name='foo', schema={'foo': 42}, schema_version=2).save() assert RegistrationSchema.objects.filter(name='foo').count() == 2 - def test_process(self): - processed = process_payload({'foo': 'bar&baz'}) - assert processed['foo'] == 'bar%26baz' - - def test_process_list(self): - processed = process_payload({'foo': ['bar', 'baz&bob']}) - assert processed['foo'][1] == 'baz%26bob' - - def test_process_whitespace(self): - processed = process_payload({'foo': 'bar baz'}) - assert processed['foo'] == 'bar baz' - if __name__ == '__main__': unittest.main() diff --git a/tests/test_misc_views.py b/tests/test_misc_views.py index 543fb7d6068..35bccc88119 100644 --- a/tests/test_misc_views.py +++ b/tests/test_misc_views.py @@ -769,29 +769,29 @@ def test_view_comments_updates_user_comments_view_timestamp_files(self): # Regression test for https://openscience.atlassian.net/browse/OSF-5193 # moved from tests/test_comments.py - def test_find_unread_includes_edited_comments(self): - project = ProjectFactory() - user = AuthUserFactory() - project.add_contributor(user, save=True) - comment = CommentFactory(node=project, user=project.creator) - n_unread = Comment.find_n_unread(user=user, node=project, page='node') - assert n_unread == 1 - - url = project.api_url_for('update_comments_timestamp') - payload = {'page': 'node', 'rootId': project._id} - self.app.put(url, json=payload, auth=user.auth) - user.reload() - n_unread = Comment.find_n_unread(user=user, node=project, page='node') - assert n_unread == 0 - - # Edit previously read comment - comment.edit( - auth=Auth(project.creator), - content='edited', - save=True - ) - n_unread = Comment.find_n_unread(user=user, node=project, page='node') - assert n_unread == 1 + def test_find_unread_includes_edited_comments(self): + project = ProjectFactory() + user = AuthUserFactory() + project.add_contributor(user, save=True) + comment = CommentFactory(node=project, user=project.creator) + n_unread = Comment.find_n_unread(user=user, node=project, page='node') + assert n_unread == 1 + + url = project.api_url_for('update_comments_timestamp') + payload = {'page': 'node', 'rootId': project._id} + self.app.put(url, json=payload, auth=user.auth) + user.reload() + n_unread = Comment.find_n_unread(user=user, node=project, page='node') + assert n_unread == 0 + + # Edit previously read comment + comment.edit( + auth=Auth(project.creator), + content='edited', + save=True + ) + n_unread = Comment.find_n_unread(user=user, node=project, page='node') + assert n_unread == 1 @mock.patch('website.views.PROXY_EMBER_APPS', False) class TestResolveGuid(OsfTestCase): @@ -834,4 +834,3 @@ def test_preprint_provider_with_osf_domain(self, mock_use_ember_app): url = web_url_for('resolve_guid', _guid=True, guid=preprint._id) res = self.app.get(url) mock_use_ember_app.assert_called_with() - diff --git a/tests/test_node_licenses.py b/tests/test_node_licenses.py deleted file mode 100644 index d16cdb500d9..00000000000 --- a/tests/test_node_licenses.py +++ /dev/null @@ -1,138 +0,0 @@ -import builtins -import json -from unittest import mock - -import pytest -from django.core.exceptions import ValidationError - -from framework.auth import Auth -from osf_tests.factories import (AuthUserFactory, NodeLicenseRecordFactory, - ProjectFactory) -from tests.base import OsfTestCase -from osf.utils.migrations import ensure_licenses -from tests.utils import assert_logs, assert_not_logs -from website import settings -from osf.models.licenses import NodeLicense, serialize_node_license_record, serialize_node_license -from osf.models import NodeLog -from osf.exceptions import NodeStateError - - - -CHANGED_NAME = 'FOO BAR' -CHANGED_TEXT = 'Some good new text' -CHANGED_PROPERTIES = ['foo', 'bar'] -LICENSE_TEXT = json.dumps({ - 'MIT': { - 'name': CHANGED_NAME, - 'text': CHANGED_TEXT, - 'properties': CHANGED_PROPERTIES - } -}) - -class TestNodeLicenses(OsfTestCase): - - def setUp(self): - super().setUp() - - self.user = AuthUserFactory() - self.node = ProjectFactory(creator=self.user) - self.LICENSE_NAME = 'MIT License' - self.node_license = NodeLicense.objects.get(name=self.LICENSE_NAME) - self.YEAR = '2105' - self.COPYRIGHT_HOLDERS = ['Foo', 'Bar'] - self.node.node_license = NodeLicenseRecordFactory( - node_license=self.node_license, - year=self.YEAR, - copyright_holders=self.COPYRIGHT_HOLDERS - ) - self.node.save() - - def test_serialize_node_license(self): - serialized = serialize_node_license(self.node_license) - assert serialized['name'] == self.LICENSE_NAME - assert serialized['id'] == self.node_license.license_id - assert serialized['text'] == self.node_license.text - - def test_serialize_node_license_record(self): - serialized = serialize_node_license_record(self.node.node_license) - assert serialized['name'] == self.LICENSE_NAME - assert serialized['id'] == self.node_license.license_id - assert serialized['text'] == self.node_license.text - assert serialized['year'] == self.YEAR - assert serialized['copyright_holders'] == self.COPYRIGHT_HOLDERS - - def test_serialize_node_license_record_None(self): - self.node.node_license = None - serialized = serialize_node_license_record(self.node.node_license) - assert serialized == {} - - def test_copy_node_license_record(self): - record = self.node.node_license - copied = record.copy() - assert copied._id is not None - assert record._id != copied._id - for prop in ('license_id', 'name', 'node_license'): - assert getattr(record, prop) == getattr(copied, prop) - - @pytest.mark.enable_implicit_clean - def test_license_uniqueness_on_id_is_enforced_in_the_database(self): - NodeLicense(license_id='foo', name='bar', text='baz').save() - pytest.raises(ValidationError, NodeLicense(license_id='foo', name='buz', text='boo').save) - - def test_ensure_licenses_updates_existing_licenses(self): - assert ensure_licenses() == (0, 18) - - def test_ensure_licenses_no_licenses(self): - before_count = NodeLicense.objects.all().count() - NodeLicense.objects.all().delete() - assert not NodeLicense.objects.all().count() - - ensure_licenses() - assert before_count == NodeLicense.objects.all().count() - - def test_ensure_licenses_some_missing(self): - NodeLicense.objects.get(license_id='LGPL3').delete() - with pytest.raises(NodeLicense.DoesNotExist): - NodeLicense.objects.get(license_id='LGPL3') - ensure_licenses() - found = NodeLicense.objects.get(license_id='LGPL3') - assert found is not None - - def test_ensure_licenses_updates_existing(self): - with mock.patch.object(builtins, 'open', mock.mock_open(read_data=LICENSE_TEXT)): - ensure_licenses() - MIT = NodeLicense.objects.get(license_id='MIT') - assert MIT.name == CHANGED_NAME - assert MIT.text == CHANGED_TEXT - assert MIT.properties == CHANGED_PROPERTIES - - @assert_logs(NodeLog.CHANGED_LICENSE, 'node') - def test_Node_set_node_license(self): - GPL3 = NodeLicense.objects.get(license_id='GPL3') - NEW_YEAR = '2014' - COPYLEFT_HOLDERS = ['Richard Stallman'] - self.node.set_node_license( - { - 'id': GPL3.license_id, - 'year': NEW_YEAR, - 'copyrightHolders': COPYLEFT_HOLDERS - }, - auth=Auth(self.user), - save=True - ) - - assert self.node.node_license.license_id == GPL3.license_id - assert self.node.node_license.name == GPL3.name - assert self.node.node_license.copyright_holders == COPYLEFT_HOLDERS - - @assert_not_logs(NodeLog.CHANGED_LICENSE, 'node') - def test_Node_set_node_license_invalid(self): - with pytest.raises(NodeStateError): - self.node.set_node_license( - { - 'id': 'SOME ID', - 'year': 'foo', - 'copyrightHolders': [] - }, - auth=Auth(self.user) - ) diff --git a/tests/test_preprints.py b/tests/test_preprints.py index a213c961659..5528ef28219 100644 --- a/tests/test_preprints.py +++ b/tests/test_preprints.py @@ -114,16 +114,6 @@ def test_verified_publishable(self, preprint): preprint.deleted = None assert preprint.verified_publishable is True - def test_is_deleted(self, preprint): - assert preprint.deleted is None - assert preprint.is_deleted is False - - preprint.deleted = timezone.now() - preprint.save() - - assert preprint.deleted is not None - assert preprint.is_deleted is True - def test_has_submitted_preprint(self, preprint): preprint.machine_state = 'initial' preprint.save() @@ -168,9 +158,6 @@ def test_all_tags(self, preprint, auth): assert len(preprint.all_tags) == 1 assert preprint.all_tags[0].name == 'test_tag_1' - def test_system_tags(self, preprint): - assert preprint.system_tags.exists() is False - class TestPreprintSubjects: From 31fe97a58ac93f18a8bae5374fa61fc2905792d6 Mon Sep 17 00:00:00 2001 From: antkryt Date: Tue, 22 Apr 2025 23:23:10 +0300 Subject: [PATCH 03/23] [ENG-7270] Enable Product Team to Force Archive Registrations in the Admin App (#11105) ## Purpose Implement Force Archive for admin UI based on `osf/management/commands/force_archive.py` ## Changes BE: 1. remove globals from force archive management command to make it thread-safe (usable in views) 2. Implement CheckArchiveStatusRegistrationsView and ForceArchiveRegistrationsView 3. RestartStuckRegistrationsView is replaced with ForceArchiveRegistrationsView FE: 1. Added Check archive status button 2. Added Force Archive button 3. Updated Restart Stuck Registration button (now it'll open the same modal as Force Archive to choose parameters but dry-mode is not available) ## QA Notes - Check archive status is displayed if node is registration - Force Archive button is displayed if registration is not archived and not stuck - Restart/Remove Stuck Registration are displayed if registration is not archived and stuck ## Ticket https://openscience.atlassian.net/browse/ENG-7270 --- admin/nodes/urls.py | 6 +- admin/nodes/views.py | 98 ++++++++--- admin/templates/nodes/node.html | 2 +- .../nodes/registration_force_archive.html | 79 +++++++++ .../registration_force_archive_form.html | 39 +++++ .../nodes/restart_stuck_registration.html | 51 ------ admin_tests/nodes/test_views.py | 59 ++++++- osf/exceptions.py | 15 ++ osf/management/commands/force_archive.py | 154 ++++++++++-------- osf/models/registrations.py | 5 + 10 files changed, 357 insertions(+), 151 deletions(-) create mode 100644 admin/templates/nodes/registration_force_archive.html create mode 100644 admin/templates/nodes/registration_force_archive_form.html delete mode 100644 admin/templates/nodes/restart_stuck_registration.html diff --git a/admin/nodes/urls.py b/admin/nodes/urls.py index d28a73e2c51..c2704ee95b2 100644 --- a/admin/nodes/urls.py +++ b/admin/nodes/urls.py @@ -27,10 +27,12 @@ re_path(r'^(?P[a-z0-9]+)/reindex_share_node/$', views.NodeReindexShare.as_view(), name='reindex-share-node'), re_path(r'^(?P[a-z0-9]+)/reindex_elastic_node/$', views.NodeReindexElastic.as_view(), name='reindex-elastic-node'), - re_path(r'^(?P[a-z0-9]+)/restart_stuck_registrations/$', views.RestartStuckRegistrationsView.as_view(), - name='restart-stuck-registrations'), re_path(r'^(?P[a-z0-9]+)/remove_stuck_registrations/$', views.RemoveStuckRegistrationsView.as_view(), name='remove-stuck-registrations'), + re_path(r'^(?P[a-z0-9]+)/check_archive_status/$', views.CheckArchiveStatusRegistrationsView.as_view(), + name='check-archive-status'), + re_path(r'^(?P[a-z0-9]+)/force_archive_registration/$', views.ForceArchiveRegistrationsView.as_view(), + name='force-archive-registration'), re_path(r'^(?P[a-z0-9]+)/remove_user/(?P[a-z0-9]+)/$', views.NodeRemoveContributorView.as_view(), name='remove-user'), re_path(r'^(?P[a-z0-9]+)/modify_storage_usage/$', views.NodeModifyStorageUsage.as_view(), diff --git a/admin/nodes/views.py b/admin/nodes/views.py index fc16a3b0d05..2d4f0c1194f 100644 --- a/admin/nodes/views.py +++ b/admin/nodes/views.py @@ -1,4 +1,5 @@ import pytz +from enum import Enum from datetime import datetime from framework import status @@ -26,7 +27,7 @@ from api.share.utils import update_share from api.caching.tasks import update_storage_usage_cache -from osf.exceptions import NodeStateError +from osf.exceptions import NodeStateError, RegistrationStuckError from osf.models import ( OSFUser, NodeLog, @@ -672,23 +673,16 @@ def post(self, request, *args, **kwargs): return redirect(self.get_success_url()) -class RestartStuckRegistrationsView(NodeMixin, TemplateView): - """ Allows an authorized user to restart a registrations archive process. +class RemoveStuckRegistrationsView(NodeMixin, View): + """ Allows an authorized user to remove a registrations if it's stuck in the archiving process. """ - template_name = 'nodes/restart_registrations_modal.html' permission_required = ('osf.view_node', 'osf.change_node') def post(self, request, *args, **kwargs): - # Prevents circular imports that cause admin app to hang at startup - from osf.management.commands.force_archive import archive, verify stuck_reg = self.get_object() - if verify(stuck_reg): - try: - archive(stuck_reg) - messages.success(request, 'Registration archive processes has restarted') - except Exception as exc: - messages.error(request, f'This registration cannot be unstuck due to {exc.__class__.__name__} ' - f'if the problem persists get a developer to fix it.') + if Registration.find_failed_registrations().filter(id=stuck_reg.id).exists(): + stuck_reg.delete_registration_tree(save=True) + messages.success(request, 'The registration has been deleted') else: messages.error(request, 'This registration may not technically be stuck,' ' if the problem persists get a developer to fix it.') @@ -696,20 +690,80 @@ def post(self, request, *args, **kwargs): return redirect(self.get_success_url()) -class RemoveStuckRegistrationsView(NodeMixin, TemplateView): - """ Allows an authorized user to remove a registrations if it's stuck in the archiving process. +class CheckArchiveStatusRegistrationsView(NodeMixin, View): + """Allows an authorized user to check a registration archive status. + """ + permission_required = ('osf.view_node', 'osf.change_node') + + def get(self, request, *args, **kwargs): + # Prevents circular imports that cause admin app to hang at startup + from osf.management.commands.force_archive import check + + registration = self.get_object() + + if registration.archived: + messages.success(request, f"Registration {registration._id} is archived.") + return redirect(self.get_success_url()) + + try: + archive_status = check(registration) + messages.success(request, archive_status) + except RegistrationStuckError as exc: + messages.error(request, str(exc)) + + return redirect(self.get_success_url()) + + +class CollisionMode(Enum): + NONE: str = 'none' + SKIP: str = 'skip' + DELETE: str = 'delete' + + +class ForceArchiveRegistrationsView(NodeMixin, View): + """Allows an authorized user to force archive registration. """ - template_name = 'nodes/remove_registrations_modal.html' permission_required = ('osf.view_node', 'osf.change_node') def post(self, request, *args, **kwargs): - stuck_reg = self.get_object() - if Registration.find_failed_registrations().filter(id=stuck_reg.id).exists(): - stuck_reg.delete_registration_tree(save=True) - messages.success(request, 'The registration has been deleted') + # Prevents circular imports that cause admin app to hang at startup + from osf.management.commands.force_archive import verify, archive, DEFAULT_PERMISSIBLE_ADDONS + + registration = self.get_object() + force_archive_params = request.POST + + collision_mode = force_archive_params.get('collision_mode', CollisionMode.NONE.value) + delete_collision = CollisionMode.DELETE.value == collision_mode + skip_collision = CollisionMode.SKIP.value == collision_mode + + allow_unconfigured = force_archive_params.get('allow_unconfigured', False) + + addons = set(force_archive_params.getlist('addons', [])) + addons.update(DEFAULT_PERMISSIBLE_ADDONS) + + try: + verify(registration, permissible_addons=addons, raise_error=True) + except ValidationError as exc: + messages.error(request, str(exc)) + return redirect(self.get_success_url()) + + dry_mode = force_archive_params.get('dry_mode', False) + + if dry_mode: + messages.success(request, f"Registration {registration._id} can be archived.") else: - messages.error(request, 'This registration may not technically be stuck,' - ' if the problem persists get a developer to fix it.') + try: + archive( + registration, + permissible_addons=addons, + allow_unconfigured=allow_unconfigured, + skip_collision=skip_collision, + delete_collision=delete_collision, + ) + messages.success(request, 'Registration archive process has finished.') + except Exception as exc: + messages.error(request, f'This registration cannot be archived due to {exc.__class__.__name__}: {str(exc)}. ' + f'If the problem persists get a developer to fix it.') return redirect(self.get_success_url()) diff --git a/admin/templates/nodes/node.html b/admin/templates/nodes/node.html index aa705243a69..c178709534f 100644 --- a/admin/templates/nodes/node.html +++ b/admin/templates/nodes/node.html @@ -17,7 +17,7 @@ View Logs {% include "nodes/remove_node.html" with node=node %} - {% include "nodes/restart_stuck_registration.html" with node=node %} + {% include "nodes/registration_force_archive.html" with node=node %} {% include "nodes/make_private.html" with node=node %} {% include "nodes/make_public.html" with node=node %} {% include "nodes/mark_spam.html" with node=node %} diff --git a/admin/templates/nodes/registration_force_archive.html b/admin/templates/nodes/registration_force_archive.html new file mode 100644 index 00000000000..7c87f1a837d --- /dev/null +++ b/admin/templates/nodes/registration_force_archive.html @@ -0,0 +1,79 @@ +{% if node.is_registration %} + + Check archive status + +{% if not node.archived %} + {% if node.is_stuck_registration %} + + Restart Stuck Registration + + + Remove Stuck Registration + + {% else %} + + Force Archive + + {% endif %} + + + + + + + +{% endif %} +{% endif %} diff --git a/admin/templates/nodes/registration_force_archive_form.html b/admin/templates/nodes/registration_force_archive_form.html new file mode 100644 index 00000000000..ab52d7f7c33 --- /dev/null +++ b/admin/templates/nodes/registration_force_archive_form.html @@ -0,0 +1,39 @@ +
+ {% csrf_token %} + +
\ No newline at end of file diff --git a/admin/templates/nodes/restart_stuck_registration.html b/admin/templates/nodes/restart_stuck_registration.html deleted file mode 100644 index c81bd3fb55f..00000000000 --- a/admin/templates/nodes/restart_stuck_registration.html +++ /dev/null @@ -1,51 +0,0 @@ -{% if node.is_stuck_registration %} - - Restart Registration - - - Remove Registration - - - -{% endif %} - diff --git a/admin_tests/nodes/test_views.py b/admin_tests/nodes/test_views.py index c80eeb27e47..9f978e75268 100644 --- a/admin_tests/nodes/test_views.py +++ b/admin_tests/nodes/test_views.py @@ -17,8 +17,9 @@ NodeKnownHamList, NodeConfirmHamView, AdminNodeLogView, - RestartStuckRegistrationsView, RemoveStuckRegistrationsView, + CheckArchiveStatusRegistrationsView, + ForceArchiveRegistrationsView, ApprovalBacklogListView, ConfirmApproveBacklogView ) @@ -375,28 +376,50 @@ def test_get_queryset(self): assert log_entry.params['title_new'] == 'New Title' -class TestRestartStuckRegistrationsView(AdminTestCase): +class TestCheckArchiveStatusRegistrationsView(AdminTestCase): + def setUp(self): + super().setUp() + self.user = AuthUserFactory() + self.view = CheckArchiveStatusRegistrationsView + self.request = RequestFactory().post('/fake_path') + + def test_check_archive_status(self): + from django.contrib.messages.storage.fallback import FallbackStorage + + registration = RegistrationFactory(creator=self.user, archive=True) + view = setup_log_view(self.view(), self.request, guid=registration._id) + + # django.contrib.messages has a bug which effects unittests + # more info here -> https://code.djangoproject.com/ticket/17971 + setattr(self.request, 'session', 'session') + messages = FallbackStorage(self.request) + setattr(self.request, '_messages', messages) + + view.get(self.request) + + assert not registration.archived + assert f'Registration {registration._id} is not stuck in archiving' in [m.message for m in messages] + + +class TestForceArchiveRegistrationsView(AdminTestCase): def setUp(self): super().setUp() self.user = AuthUserFactory() self.registration = RegistrationFactory(creator=self.user, archive=True) self.registration.save() - self.view = RestartStuckRegistrationsView + self.view = ForceArchiveRegistrationsView self.request = RequestFactory().post('/fake_path') def test_get_object(self): - view = RestartStuckRegistrationsView() - view = setup_log_view(view, self.request, guid=self.registration._id) + view = setup_log_view(self.view(), self.request, guid=self.registration._id) assert self.registration == view.get_object() - def test_restart_stuck_registration(self): + def test_force_archive_registration(self): # Prevents circular import that prevents admin app from starting up from django.contrib.messages.storage.fallback import FallbackStorage - view = RestartStuckRegistrationsView() - view = setup_log_view(view, self.request, guid=self.registration._id) - assert self.registration.archive_job.status == 'INITIATED' + view = setup_log_view(self.view(), self.request, guid=self.registration._id) # django.contrib.messages has a bug which effects unittests # more info here -> https://code.djangoproject.com/ticket/17971 @@ -408,6 +431,24 @@ def test_restart_stuck_registration(self): assert self.registration.archive_job.status == 'SUCCESS' + def test_force_archive_registration_dry_mode(self): + # Prevents circular import that prevents admin app from starting up + from django.contrib.messages.storage.fallback import FallbackStorage + + request = RequestFactory().post('/fake_path', data={'dry_mode': 'true'}) + view = setup_log_view(self.view(), request, guid=self.registration._id) + assert self.registration.archive_job.status == 'INITIATED' + + # django.contrib.messages has a bug which effects unittests + # more info here -> https://code.djangoproject.com/ticket/17971 + setattr(request, 'session', 'session') + messages = FallbackStorage(request) + setattr(request, '_messages', messages) + + view.post(request) + + assert self.registration.archive_job.status == 'INITIATED' + class TestRemoveStuckRegistrationsView(AdminTestCase): def setUp(self): diff --git a/osf/exceptions.py b/osf/exceptions.py index 82e8ab5f505..30130a587d1 100644 --- a/osf/exceptions.py +++ b/osf/exceptions.py @@ -292,3 +292,18 @@ class MetadataSerializationError(OSFError): class InvalidCookieOrSessionError(OSFError): """Raised when cookie is invalid or session key is not found.""" pass + + +class RegistrationStuckError(OSFError): + """Raised if Registration stuck during archive.""" + pass + + +class RegistrationStuckRecoverableException(RegistrationStuckError): + """Raised if registration stuck but recoverable.""" + pass + + +class RegistrationStuckBrokenException(RegistrationStuckError): + """Raised if registration stuck and not recoverable.""" + pass diff --git a/osf/management/commands/force_archive.py b/osf/management/commands/force_archive.py index d58b3641deb..3a40ea4d5f8 100644 --- a/osf/management/commands/force_archive.py +++ b/osf/management/commands/force_archive.py @@ -22,10 +22,12 @@ import json import logging import requests +import contextlib import django django.setup() from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError from django.core.management.base import BaseCommand from django.db.models import Q from django.db.utils import IntegrityError @@ -35,6 +37,7 @@ from framework import sentry from framework.exceptions import HTTPError from osf.models import Node, NodeLog, Registration, BaseFileNode +from osf.exceptions import RegistrationStuckRecoverableException, RegistrationStuckBrokenException from api.base.utils import waterbutler_api_url_for from scripts import utils as script_utils from website.archiver import ARCHIVER_SUCCESS @@ -43,11 +46,6 @@ logger = logging.getLogger(__name__) -# Control globals -DELETE_COLLISIONS = False -SKIP_COLLISIONS = False -ALLOW_UNCONFIGURED = False - # Logging globals CHECKED_OKAY = [] CHECKED_STUCK_RECOVERABLE = [] @@ -57,7 +55,7 @@ SKIPPED = [] # Ignorable NodeLogs -LOG_WHITELIST = { +LOG_WHITELIST = ( 'affiliated_institution_added', 'category_updated', 'comment_added', @@ -109,35 +107,34 @@ 'node_access_requests_disabled', 'view_only_link_added', 'view_only_link_removed', -} +) # Require action, but recoverable from -LOG_GREYLIST = { +LOG_GREYLIST = ( 'addon_file_moved', 'addon_file_renamed', 'osf_storage_file_added', 'osf_storage_file_removed', 'osf_storage_file_updated', 'osf_storage_folder_created' -} -VERIFY_PROVIDER = { +) +VERIFY_PROVIDER = ( 'addon_file_moved', 'addon_file_renamed' -} +) # Permissible in certain circumstances after communication with user -PERMISSIBLE_BLACKLIST = { +PERMISSIBLE_BLACKLIST = ( 'dropbox_folder_selected', 'dropbox_node_authorized', 'dropbox_node_deauthorized', 'addon_removed', 'addon_added' -} +) -# Extendable with command line input -PERMISSIBLE_ADDONS = { - 'osfstorage' -} +DEFAULT_PERMISSIBLE_ADDONS = ( + 'osfstorage', +) def complete_archive_target(reg, addon_short_name): # Cache registration files count @@ -149,16 +146,16 @@ def complete_archive_target(reg, addon_short_name): target.save() archive_job._post_update_target() -def perform_wb_copy(reg, node_settings): +def perform_wb_copy(reg, node_settings, delete_collisions=False, skip_collisions=False): src, dst, user = reg.archive_job.info() if dst.files.filter(name=node_settings.archive_folder_name.replace('/', '-')).exists(): - if not DELETE_COLLISIONS and not SKIP_COLLISIONS: + if not delete_collisions and not skip_collisions: raise Exception('Archive folder for {} already exists. Investigate manually and rerun with either --delete-collisions or --skip-collisions') - if DELETE_COLLISIONS: + if delete_collisions: archive_folder = dst.files.exclude(type='osf.trashedfolder').get(name=node_settings.archive_folder_name.replace('/', '-')) logger.info(f'Removing {archive_folder}') archive_folder.delete() - if SKIP_COLLISIONS: + if skip_collisions: complete_archive_target(reg, node_settings.short_name) return cookie = user.get_or_create_cookie().decode() @@ -283,9 +280,9 @@ def get_logs_to_revert(reg): Q(node=reg.registered_from) | (Q(params__source__nid=reg.registered_from._id) | Q(params__destination__nid=reg.registered_from._id))).order_by('-date') -def revert_log_actions(file_tree, reg, obj_cache): +def revert_log_actions(file_tree, reg, obj_cache, permissible_addons): logs_to_revert = get_logs_to_revert(reg) - if len(PERMISSIBLE_ADDONS) > 1: + if len(permissible_addons) > 1: logs_to_revert = logs_to_revert.exclude(action__in=PERMISSIBLE_BLACKLIST) for log in list(logs_to_revert): try: @@ -327,7 +324,7 @@ def revert_log_actions(file_tree, reg, obj_cache): obj_cache.add(file_obj._id) return file_tree -def build_file_tree(reg, node_settings): +def build_file_tree(reg, node_settings, *args, **kwargs): n = reg.registered_from obj_cache = set(n.files.values_list('_id', flat=True)) @@ -344,45 +341,47 @@ def _recurse(file_obj, node): return serialized current_tree = _recurse(node_settings.get_root(), n) - return revert_log_actions(current_tree, reg, obj_cache) + return revert_log_actions(current_tree, reg, obj_cache, *args, **kwargs) -def archive(registration): +def archive(registration, *args, permissible_addons=DEFAULT_PERMISSIBLE_ADDONS, allow_unconfigured=False, **kwargs): for reg in registration.node_and_primary_descendants(): reg.registered_from.creator.get_or_create_cookie() # Allow WB requests if reg.archive_job.status == ARCHIVER_SUCCESS: continue logs_to_revert = reg.registered_from.logs.filter(date__gt=reg.registered_date).exclude(action__in=LOG_WHITELIST) - if len(PERMISSIBLE_ADDONS) == 1: + if len(permissible_addons) == 1: assert not logs_to_revert.exclude(action__in=LOG_GREYLIST).exists(), f'{registration._id}: {reg.registered_from._id} had unexpected unacceptable logs' else: assert not logs_to_revert.exclude(action__in=LOG_GREYLIST).exclude(action__in=PERMISSIBLE_BLACKLIST).exists(), f'{registration._id}: {reg.registered_from._id} had unexpected unacceptable logs' logger.info(f'Preparing to archive {reg._id}') - for short_name in PERMISSIBLE_ADDONS: + for short_name in permissible_addons: node_settings = reg.registered_from.get_addon(short_name) if not hasattr(node_settings, '_get_file_tree'): # Excludes invalid or None-type continue if not node_settings.configured: - if not ALLOW_UNCONFIGURED: + if not allow_unconfigured: raise Exception(f'{reg._id}: {short_name} on {reg.registered_from._id} is not configured. If this is permissible, re-run with `--allow-unconfigured`.') continue if not reg.archive_job.get_target(short_name) or reg.archive_job.get_target(short_name).status == ARCHIVER_SUCCESS: continue if short_name == 'osfstorage': - file_tree = build_file_tree(reg, node_settings) + file_tree = build_file_tree(reg, node_settings, permissible_addons=permissible_addons) manually_archive(file_tree, reg, node_settings) complete_archive_target(reg, short_name) else: assert reg.archiving, f'{reg._id}: Must be `archiving` for WB to copy' - perform_wb_copy(reg, node_settings) + perform_wb_copy(reg, node_settings, *args, **kwargs) -def archive_registrations(): +def archive_registrations(*args, **kwargs): for reg in deepcopy(VERIFIED): - archive(reg) + archive(reg, *args, *kwargs) ARCHIVED.append(reg) VERIFIED.remove(reg) -def verify(registration): +def verify(registration, permissible_addons=DEFAULT_PERMISSIBLE_ADDONS, raise_error=False): + maybe_suppress_error = contextlib.suppress(ValidationError) if not raise_error else contextlib.nullcontext(enter_result=False) + for reg in registration.node_and_primary_descendants(): logger.info(f'Verifying {reg._id}') if reg.archive_job.status == ARCHIVER_SUCCESS: @@ -390,26 +389,41 @@ def verify(registration): nonignorable_logs = get_logs_to_revert(reg) unacceptable_logs = nonignorable_logs.exclude(action__in=LOG_GREYLIST) if unacceptable_logs.exists(): - if len(PERMISSIBLE_ADDONS) == 1 or unacceptable_logs.exclude(action__in=PERMISSIBLE_BLACKLIST): - logger.error('{}: Original node {} has unacceptable logs: {}'.format( + if len(permissible_addons) == 1 or unacceptable_logs.exclude(action__in=PERMISSIBLE_BLACKLIST): + message = '{}: Original node {} has unacceptable logs: {}'.format( registration._id, reg.registered_from._id, list(unacceptable_logs.values_list('action', flat=True)) - )) + ) + logger.error(message) + + with maybe_suppress_error: + raise ValidationError(message) + return False if nonignorable_logs.filter(action__in=VERIFY_PROVIDER).exists(): for log in nonignorable_logs.filter(action__in=VERIFY_PROVIDER): for key in ['source', 'destination']: if key in log.params: if log.params[key]['provider'] != 'osfstorage': - logger.error('{}: {} Only OSFS moves and renames are permissible'.format( + message = '{}: {} Only OSFS moves and renames are permissible'.format( registration._id, log._id - )) + ) + logger.error(message) + + with maybe_suppress_error: + raise ValidationError(message) + return False addons = reg.registered_from.get_addon_names() - if set(addons) - set(PERMISSIBLE_ADDONS | {'wiki'}) != set(): - logger.error(f'{registration._id}: Original node {reg.registered_from._id} has addons: {addons}') + if set(addons) - set(permissible_addons | {'wiki'}) != set(): + message = f'{registration._id}: Original node {reg.registered_from._id} has addons: {addons}' + logger.error(message) + + with maybe_suppress_error: + raise ValidationError(message) + return False if nonignorable_logs.exists(): logger.info('{}: Original node {} has had revertable file operations'.format( @@ -423,23 +437,23 @@ def verify(registration): )) return True -def verify_registrations(registration_ids): +def verify_registrations(registration_ids, permissible_addons): for r_id in registration_ids: reg = Registration.load(r_id) if not reg: logger.warning(f'Registration {r_id} not found') else: - if verify(reg): + if verify(reg, permissible_addons=permissible_addons): VERIFIED.append(reg) else: SKIPPED.append(reg) def check(reg): + """Check registration status. Raise exception if registration stuck.""" logger.info(f'Checking {reg._id}') if reg.is_deleted: - logger.info(f'Registration {reg._id} is deleted.') - CHECKED_OKAY.append(reg) - return + return f'Registration {reg._id} is deleted.' + expired_if_before = timezone.now() - ARCHIVE_TIMEOUT_TIMEDELTA archive_job = reg.archive_job root_job = reg.root.archive_job @@ -452,14 +466,11 @@ def check(reg): if still_archiving and root_job.datetime_initiated < expired_if_before: logger.warning(f'Registration {reg._id} is stuck in archiving') if verify(reg): - logger.info(f'Registration {reg._id} verified recoverable') - CHECKED_STUCK_RECOVERABLE.append(reg) + raise RegistrationStuckRecoverableException(f'Registration {reg._id} is stuck and verified recoverable') else: - logger.info(f'Registration {reg._id} verified broken') - CHECKED_STUCK_BROKEN.append(reg) - else: - logger.info(f'Registration {reg._id} is not stuck in archiving') - CHECKED_OKAY.append(reg) + raise RegistrationStuckBrokenException(f'Registration {reg._id} is stuck and verified broken') + + return f'Registration {reg._id} is not stuck in archiving' def check_registrations(registration_ids): for r_id in registration_ids: @@ -467,7 +478,16 @@ def check_registrations(registration_ids): if not reg: logger.warning(f'Registration {r_id} not found') else: - check(reg) + try: + status = check(reg) + logger.info(status) + CHECKED_OKAY.append(reg) + except RegistrationStuckRecoverableException as exc: + logger.info(str(exc)) + CHECKED_STUCK_RECOVERABLE.append(reg) + except RegistrationStuckBrokenException as exc: + logger.info(str(exc)) + CHECKED_STUCK_BROKEN.append(reg) def log_results(dry_run): if CHECKED_OKAY: @@ -527,29 +547,31 @@ def add_arguments(self, parser): parser.add_argument('--guids', type=str, nargs='+', help='GUIDs of registrations to archive') def handle(self, *args, **options): - global DELETE_COLLISIONS - global SKIP_COLLISIONS - global ALLOW_UNCONFIGURED - DELETE_COLLISIONS = options.get('delete_collisions') - SKIP_COLLISIONS = options.get('skip_collisions') - ALLOW_UNCONFIGURED = options.get('allow_unconfigured') - if DELETE_COLLISIONS and SKIP_COLLISIONS: + delete_collisions = options.get('delete_collisions') + skip_collisions = options.get('skip_collisions') + allow_unconfigured = options.get('allow_unconfigured') + if delete_collisions and skip_collisions: raise Exception('Cannot specify both delete_collisions and skip_collisions') dry_run = options.get('dry_run') if not dry_run: script_utils.add_file_logger(logger, __file__) - addons = options.get('addons', []) - if addons: - PERMISSIBLE_ADDONS.update(set(addons)) + addons = options.get('addons') or set() + addons.update(DEFAULT_PERMISSIBLE_ADDONS) + registration_ids = options.get('guids', []) if options.get('check', False): check_registrations(registration_ids) else: - verify_registrations(registration_ids) + verify_registrations(registration_ids, permissible_addons=addons) if not dry_run: - archive_registrations() + archive_registrations( + permissible_addons=addons, + delete_collisions=delete_collisions, + skip_collisions=skip_collisions, + allow_unconfigured=allow_unconfigured, + ) log_results(dry_run) diff --git a/osf/models/registrations.py b/osf/models/registrations.py index f7b017d9ddf..b92aed1e8e2 100644 --- a/osf/models/registrations.py +++ b/osf/models/registrations.py @@ -325,6 +325,11 @@ def archiving(self): job = self.archive_job return job and not job.done and not job.archive_tree_finished() + @property + def archived(self): + job = self.archive_job + return job and job.done and job.archive_tree_finished() + @property def is_moderated(self): if not self.provider: From 49f479aa54eb8682031aad8d86003f7b785d118b Mon Sep 17 00:00:00 2001 From: ihorsokhanexoft Date: Tue, 22 Apr 2025 23:25:50 +0300 Subject: [PATCH 04/23] [ENG-7798] Parse versioned guid (#11104) ## Purpose `Preprint` inherits from `VersionedGuidMixin` that inherits from `GuidMixin` thus `coerce_guid` tries to fetch guid from `guids` attribute instead of `versioned_guids` ## Changes Added if statement ## Ticket https://openscience.atlassian.net/browse/ENG-7798 --- api_tests/base/test_utils.py | 69 +++++++++++++++++++++++++++++++++++- osf/models/base.py | 2 ++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/api_tests/base/test_utils.py b/api_tests/base/test_utils.py index 51ff6611da0..32429fdf0f7 100644 --- a/api_tests/base/test_utils.py +++ b/api_tests/base/test_utils.py @@ -6,8 +6,11 @@ from rest_framework import fields from rest_framework.exceptions import ValidationError -from api.base import utils as api_utils +from api.base import utils as api_utils +from osf.models.base import coerce_guid, Guid, GuidMixin, OptionalGuidMixin, VersionedGuidMixin, InvalidGuid +from osf_tests.factories import ProjectFactory, PreprintFactory +from tests.test_websitefiles import TestFile from framework.status import push_status_message SessionStore = import_module(django_conf_settings.SESSION_ENGINE).SessionStore @@ -100,3 +103,67 @@ def test_push_status_message_unexpected_error(self, mock_get_session): 'Unexpected Exception from push_status_message when called ' 'from the v2 API with type "error"' ) + + +@pytest.mark.django_db +class TestCoerceGuid: + + def test_guid_instance(self): + project = ProjectFactory() + assert isinstance(project.guids.first(), Guid) + assert coerce_guid(project.guids.first()) == project.guids.first() + + def test_versioned_guid_instance(self): + preprint = PreprintFactory() + assert isinstance(preprint, VersionedGuidMixin) + assert coerce_guid(preprint) == preprint.versioned_guids.first().guid + + def test_guid_mixin_instance(self): + project = ProjectFactory() + assert isinstance(project, GuidMixin) + assert coerce_guid(project._id) == project.guids.first() + + def test_str_guid_instance(self): + project = ProjectFactory() + str_guid = str(project._id) + guid = coerce_guid(str_guid) + assert isinstance(guid, Guid) + assert guid == project.guids.first() + + def test_incorrect_str_guid_instance(self): + incorrect_guid = '12345' + with pytest.raises(InvalidGuid, match='guid does not exist'): + assert coerce_guid(incorrect_guid) + + def test_optional_guid_instance(self): + node = ProjectFactory() + test_file = TestFile( + _path='anid', + name='name', + target=node, + provider='test', + materialized_path='/long/path/to/name', + ) + test_file.save() + test_file.get_guid(create=True) + assert isinstance(test_file, OptionalGuidMixin) + assert coerce_guid(test_file) == test_file.guids.first() + + def test_incorrect_optional_guid_instance(self): + node = ProjectFactory() + test_file = TestFile( + _path='anid', + name='name', + target=node, + provider='test', + materialized_path='/long/path/to/name', + ) + test_file.save() + assert isinstance(test_file, OptionalGuidMixin) + with pytest.raises(InvalidGuid, match='guid does not exist'): + assert coerce_guid(test_file) + + def test_invalid_guid(self): + incorrect_guid = 12345 + with pytest.raises(InvalidGuid, match='cannot coerce'): + assert coerce_guid(incorrect_guid) diff --git a/osf/models/base.py b/osf/models/base.py index 4b51544dd15..d2c07a86d9e 100644 --- a/osf/models/base.py +++ b/osf/models/base.py @@ -49,6 +49,8 @@ def generate_object_id(): def coerce_guid(maybe_guid, create_if_needed=False): if isinstance(maybe_guid, Guid): return maybe_guid + if isinstance(maybe_guid, VersionedGuidMixin): + return maybe_guid.versioned_guids.first().guid if isinstance(maybe_guid, GuidMixin): return maybe_guid.guids.first() if isinstance(maybe_guid, OptionalGuidMixin): From 30f818544c14710d76318dd98f1803aed10d76db Mon Sep 17 00:00:00 2001 From: Vlad0n20 <137097005+Vlad0n20@users.noreply.github.com> Date: Tue, 22 Apr 2025 23:37:26 +0300 Subject: [PATCH 05/23] [ENG-7263] Fix/eng 7263 (#11090) ## Purpose Fix bug with citations for a new preprint version. The problem was in the process of creating a version of the preprint, so also added the manage command to fix already created versions ## Changes Fix bug with citations for a new preprint version ## Ticket https://openscience.atlassian.net/browse/ENG-7263 --- ...unclaimed_records_for_preprint_versions.py | 152 ++++++++++++++++++ osf/models/preprint.py | 15 ++ osf/models/user.py | 1 + 3 files changed, 168 insertions(+) create mode 100644 osf/management/commands/fix_unclaimed_records_for_preprint_versions.py diff --git a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py new file mode 100644 index 00000000000..655e4b6c039 --- /dev/null +++ b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py @@ -0,0 +1,152 @@ +import logging + +from django.core.management.base import BaseCommand +from django.apps import apps +from django.db.models import Q + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = 'Update unclaimed records for preprint versions' + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + dest='dry_run', + help='Run the command without saving changes', + ) + + def handle(self, *args, **options): + dry_run = options.get('dry_run', False) + update_unclaimed_records_for_preprint_versions(dry_run=dry_run) + + +def update_unclaimed_records_for_preprint_versions(dry_run=False): + Preprint = apps.get_model('osf.Preprint') + Guid = apps.get_model('osf.Guid') + OSFUser = apps.get_model('osf.OSFUser') + GuidVersionsThrough = apps.get_model('osf.GuidVersionsThrough') + + preprint_filters = ( + Q(preprintcontributor__user__is_registered=False) | + Q(preprintcontributor__user__date_disabled__isnull=False) + ) + + mode = 'DRY RUN' if dry_run else 'UPDATING' + logger.info(f'Starting {mode} for unclaimed records for preprint versions') + + preprints_count = Preprint.objects.filter( + preprint_filters + ).distinct('versioned_guids__guid').count() + + logger.info(f'Found {preprints_count} preprints with unregistered contributors') + + processed_count = 0 + skipped_count = 0 + updated_count = 0 + + logger.info('-' * 50) + logger.info(f'{mode} MODE') + logger.info('-' * 50) + + for preprint in Preprint.objects.filter( + preprint_filters + ).prefetch_related('_contributors').distinct( + 'versioned_guids__guid' + ): + processed_count += 1 + try: + guid, version = Guid.split_guid(preprint._id) + logger.info(f'[{processed_count}/{preprints_count}] Processing preprint {preprint._id}') + + latest_version_through = GuidVersionsThrough.objects.filter(guid___id=guid).last() + if not latest_version_through: + logger.error(f'No version found for guid {guid}, skipping') + skipped_count += 1 + continue + + latest_version_number = latest_version_through.version + unregistered_contributors = preprint.contributor_set.filter(user__is_registered=False) + logger.info(f'Found {unregistered_contributors.count()} unregistered contributors for preprint {preprint._id}') + + for contributor in unregistered_contributors: + try: + records_key_for_current_guid = [key for key in contributor.user.unclaimed_records.keys() if guid in key] + if records_key_for_current_guid: + records_key_for_current_guid.sort( + key=lambda x: int(x.split(Preprint.GUID_VERSION_DELIMITER)[1]), + ) + record_info = contributor.user.unclaimed_records[records_key_for_current_guid[0]] + for current_version in range(1, int(latest_version_number) + 1): + preprint_id = f'{guid}{Preprint.GUID_VERSION_DELIMITER}{current_version}' + if preprint_id not in contributor.user.unclaimed_records.keys(): + if not dry_run: + try: + preprint_obj = Preprint.load(preprint_id) + referrer = OSFUser.load(record_info['referrer_id']) + + if not preprint_obj: + logger.error(f'Could not load preprint {preprint_id}, skipping') + continue + + if not referrer: + logger.error(f'Could not load referrer {record_info["referrer_id"]}, skipping') + continue + + logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}') + contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record( + claim_origin=preprint_obj, + referrer=referrer, + given_name=record_info.get('name', None), + email=record_info.get('email', None), + provided_pid=preprint_id, + ) + contributor.user.save() + updated_count += 1 + logger.info(f'Successfully saved unclaimed record for {preprint_id}') + except Exception as e: + logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}') + else: + logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}') + updated_count += 1 + else: + try: + all_versions = [guid.referent for guid in GuidVersionsThrough.objects.filter(guid___id=guid)] + logger.info(f'Found {len(all_versions)} versions for preprint with guid {guid}') + + for current_preprint in all_versions: + preprint_id = current_preprint._id + if preprint_id not in contributor.user.unclaimed_records.keys(): + if not dry_run: + try: + logger.info(f'Adding unclaimed record for {preprint_id} for user {contributor.user._id}') + contributor.user.unclaimed_records[preprint_id] = contributor.user.add_unclaimed_record( + claim_origin=current_preprint, + referrer=current_preprint.creator, + given_name=contributor.user.fullname, + email=contributor.user.username, + provided_pid=preprint_id, + ) + contributor.user.save() + updated_count += 1 + logger.info(f'Successfully saved unclaimed record for {preprint_id}') + except Exception as e: + logger.error(f'Error adding unclaimed record for {preprint_id}: {str(e)}') + else: + logger.info(f'[DRY RUN] Would add unclaimed record for {preprint_id} for user {contributor.user._id}') + updated_count += 1 + except Exception as e: + logger.error(f'Error processing versions for guid {guid}: {str(e)}') + except Exception as e: + logger.error(f'Error processing contributor {contributor.id}: {str(e)}') + + except Exception as e: + logger.error(f'Unexpected error processing preprint {preprint.id}: {str(e)}') + skipped_count += 1 + + if dry_run: + logger.info(f'Processed: {processed_count}, Would update: {updated_count}, Skipped: {skipped_count}') + else: + logger.info(f'Processed: {processed_count}, Updated: {updated_count}, Skipped: {skipped_count}') diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 162ab8b00a8..30fdb19df04 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -463,6 +463,21 @@ def create_version(cls, create_from_guid, auth): sentry.log_exception(e) sentry.log_message(f'Contributor was not added to new preprint version due to error: ' f'[preprint={preprint._id}, user={contributor.user._id}]') + + # Add new version record for unregistered contributors + for contributor in preprint.contributor_set.filter(Q(user__is_registered=False) | Q(user__date_disabled__isnull=False)): + try: + contributor.user.add_unclaimed_record( + claim_origin=preprint, + referrer=auth.user, + email=contributor.user.email, + given_name=contributor.user.fullname, + ) + except ValidationError as e: + sentry.log_exception(e) + sentry.log_message(f'Unregistered contributor was not added to new preprint version due to error: ' + f'[preprint={preprint._id}, user={contributor.user._id}]') + # Add affiliated institutions for institution in latest_version.affiliated_institutions.all(): preprint.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True) diff --git a/osf/models/user.py b/osf/models/user.py index 8d3a844c711..fa1d4ec254a 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -71,6 +71,7 @@ MAX_QUICKFILES_MERGE_RENAME_ATTEMPTS = 1000 + def get_default_mailing_lists(): return {'Open Science Framework Help': True} From 3738135bf9ecc35ca94e1c2f2071598353dab5c0 Mon Sep 17 00:00:00 2001 From: Ostap Zherebetskyi Date: Thu, 3 Apr 2025 14:04:40 +0300 Subject: [PATCH 06/23] Add registration callback endpoint and tests --- addons/base/views.py | 6 +- api/registrations/urls.py | 1 + api/registrations/views.py | 53 +++++++++++- .../views/test_regisatration_callbacks.py | 82 +++++++++++++++++++ osf/models/node.py | 7 ++ 5 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 api_tests/registrations/views/test_regisatration_callbacks.py diff --git a/addons/base/views.py b/addons/base/views.py index 6f22c71f3e3..a6c90860b98 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -431,11 +431,7 @@ def _enqueue_metrics(file_version, file_node, action, auth, from_mfr=False): def _construct_payload(auth, resource, credentials, waterbutler_settings): if isinstance(resource, Registration): - callback_url = resource.api_url_for( - 'registration_callbacks', - _absolute=True, - _internal=True - ) + callback_url = resource.callbacks_url else: callback_url = resource.api_url_for( 'create_waterbutler_log', diff --git a/api/registrations/urls.py b/api/registrations/urls.py index 66e5175b05b..232be823bb9 100644 --- a/api/registrations/urls.py +++ b/api/registrations/urls.py @@ -13,6 +13,7 @@ re_path(r'^(?P\w+)/$', views.RegistrationDetail.as_view(), name=views.RegistrationDetail.view_name), re_path(r'^(?P\w+)/bibliographic_contributors/$', views.RegistrationBibliographicContributorsList.as_view(), name=views.RegistrationBibliographicContributorsList.view_name), re_path(r'^(?P\w+)/cedar_metadata_records/$', views.RegistrationCedarMetadataRecordsList.as_view(), name=views.RegistrationCedarMetadataRecordsList.view_name), + re_path(r'^(?P\w+)/callbacks/$', views.RegistrationCallbackView.as_view(), name=views.RegistrationCallbackView.view_name), re_path(r'^(?P\w+)/children/$', views.RegistrationChildrenList.as_view(), name=views.RegistrationChildrenList.view_name), re_path(r'^(?P\w+)/comments/$', views.RegistrationCommentsList.as_view(), name=views.RegistrationCommentsList.view_name), re_path(r'^(?P\w+)/contributors/$', views.RegistrationContributorsList.as_view(), name=views.RegistrationContributorsList.view_name), diff --git a/api/registrations/views.py b/api/registrations/views.py index 8254ea69edf..45fd1f20ba0 100644 --- a/api/registrations/views.py +++ b/api/registrations/views.py @@ -1,7 +1,13 @@ -from rest_framework import generics, mixins, permissions as drf_permissions +from rest_framework import generics, mixins, permissions as drf_permissions, status from rest_framework.exceptions import ValidationError, NotFound, PermissionDenied +from rest_framework.response import Response +from framework.exceptions import HTTPError from framework.auth.oauth_scopes import CoreScopes +from addons.base.views import DOWNLOAD_ACTIONS +from website.archiver import signals, ARCHIVER_NETWORK_ERROR, ARCHIVER_SUCCESS, ARCHIVER_FAILURE +from website.project import signals as project_signals + from osf.models import Registration, OSFUser, RegistrationProvider, OutcomeArtifact, CedarMetadataRecord from osf.utils.permissions import WRITE_NODE from osf.utils.workflows import ApprovalStates @@ -28,6 +34,7 @@ JSONAPIMultipleRelationshipsParser, JSONAPIRelationshipParserForRegularJSON, JSONAPIMultipleRelationshipsParserForRegularJSON, + HMACSignedParser, ) from api.base.utils import ( get_user_auth, @@ -1040,3 +1047,47 @@ def get_default_queryset(self): def get_queryset(self): return self.get_queryset_from_request() + + +class RegistrationCallbackView(JSONAPIBaseView, generics.UpdateAPIView, RegistrationMixin): + permission_classes = [drf_permissions.AllowAny] + + view_category = 'registrations' + view_name = 'registration-callbacks' + + parser_classes = [HMACSignedParser] + + def update(self, request, *args, **kwargs): + registration = self.get_node() + + try: + payload = request.data + if payload.get('action', None) in DOWNLOAD_ACTIONS: + return Response({'status': 'success'}, status=status.HTTP_200_OK) + errors = payload.get('errors') + src_provider = payload['source']['provider'] + if errors: + registration.archive_job.update_target( + src_provider, + ARCHIVER_FAILURE, + errors=errors, + ) + else: + # Dataverse requires two seperate targets, one + # for draft files and one for published files + if src_provider == 'dataverse': + src_provider += '-' + (payload['destination']['name'].split(' ')[-1].lstrip('(').rstrip(')').strip()) + registration.archive_job.update_target( + src_provider, + ARCHIVER_SUCCESS, + ) + project_signals.archive_callback.send(registration) + return Response(status=status.HTTP_200_OK) + except HTTPError as e: + registration.archive_status = ARCHIVER_NETWORK_ERROR + registration.save() + signals.archive_fail.send( + registration, + errors=[str(e)] + ) + return Response(status=status.HTTP_200_OK) diff --git a/api_tests/registrations/views/test_regisatration_callbacks.py b/api_tests/registrations/views/test_regisatration_callbacks.py new file mode 100644 index 00000000000..d559fbf14b7 --- /dev/null +++ b/api_tests/registrations/views/test_regisatration_callbacks.py @@ -0,0 +1,82 @@ +import copy +import time +import pytest + +from api.base.settings.defaults import API_BASE +from osf_tests.factories import RegistrationFactory +from framework.auth import signing + + +@pytest.mark.django_db +class TestRegistrationCallbacks: + + @pytest.fixture() + def registration(self): + registration = RegistrationFactory() + return registration + + @pytest.fixture() + def url(self, registration): + return f'/{API_BASE}registrations/{registration._id}/callbacks/' + + @pytest.fixture() + def payload(self): + return { + "action": "copy", + "destination": { + "name": "Archive of OSF Storage", + }, + "errors": None, + "source": { + "provider": "osfstorage", + }, + "time": time.time() + 1000 + } + + def sign_payload(self, payload): + message, signature = signing.default_signer.sign_payload(payload) + return { + 'payload': message, + 'signature': signature, + } + + def test_registration_callback(self, app, payload, url): + data = self.sign_payload(payload) + res = app.put_json(url, data) + assert res.status_code == 200 + + def test_signature_expired(self, app, payload, url): + payload['time'] = time.time() - 100 + data = self.sign_payload(payload) + res = app.put_json(url, data, expect_errors=True) + assert res.status_code == 400 + assert res.json['errors'][0]['detail'] == 'Signature has expired' + + def test_bad_signature(self, app, payload, url): + data = self.sign_payload(payload) + data['signature'] = '1234' + res = app.put_json(url, data, expect_errors=True) + assert res.status_code == 401 + assert res.json['errors'][0]['detail'] == 'Authentication credentials were not provided.' + + def test_invalid_payload(self, app, payload, url): + payload1 = copy.deepcopy(payload) + del payload1['time'] + data = self.sign_payload(payload1) + res = app.put_json(url, data, expect_errors=True) + assert res.status_code == 400 + assert res.json['errors'][0]['detail'] == 'Invalid Payload' + + payload2 = copy.deepcopy(payload) + data = self.sign_payload(payload2) + del data['signature'] + res = app.put_json(url, data, expect_errors=True) + assert res.status_code == 400 + assert res.json['errors'][0]['detail'] == 'Invalid Payload' + + payload3 = copy.deepcopy(payload) + data = self.sign_payload(payload3) + del data['payload'] + res = app.put_json(url, data, expect_errors=True) + assert res.status_code == 400 + assert res.json['errors'][0]['detail'] == 'Invalid Payload' diff --git a/osf/models/node.py b/osf/models/node.py index 47bf04d4f59..5c60e3f0a3c 100644 --- a/osf/models/node.py +++ b/osf/models/node.py @@ -612,6 +612,10 @@ def institutions_url(self): def institutions_relationship_url(self): return self.absolute_api_v2_url + 'relationships/institutions/' + @property + def callbacks_url(self): + return self.absolute_api_v2_url + 'callbacks/' + # For Comment API compatibility @property def target_type(self): @@ -661,6 +665,9 @@ def web_url_for(self, view_name, _absolute=False, _guid=False, *args, **kwargs): def api_url_for(self, view_name, _absolute=False, *args, **kwargs): return api_url_for(view_name, pid=self._primary_key, _absolute=_absolute, *args, **kwargs) + def api_v2_url_for(self, path_str, params=None, **kwargs): + return api_url_for(path_str, params=params, **kwargs) + @property def project_or_component(self): # The distinction is drawn based on whether something has a parent node, rather than by category From 418c044386ba951e3b9dfd970fd3507945fe256e Mon Sep 17 00:00:00 2001 From: Ostap Zherebetskyi Date: Thu, 3 Apr 2025 14:14:28 +0300 Subject: [PATCH 07/23] add-trailing-comma double-quote-string-fixer --- api/registrations/views.py | 2 +- .../views/test_regisatration_callbacks.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/registrations/views.py b/api/registrations/views.py index 45fd1f20ba0..a8d10d0602b 100644 --- a/api/registrations/views.py +++ b/api/registrations/views.py @@ -1088,6 +1088,6 @@ def update(self, request, *args, **kwargs): registration.save() signals.archive_fail.send( registration, - errors=[str(e)] + errors=[str(e)], ) return Response(status=status.HTTP_200_OK) diff --git a/api_tests/registrations/views/test_regisatration_callbacks.py b/api_tests/registrations/views/test_regisatration_callbacks.py index d559fbf14b7..35d65d013b6 100644 --- a/api_tests/registrations/views/test_regisatration_callbacks.py +++ b/api_tests/registrations/views/test_regisatration_callbacks.py @@ -22,15 +22,15 @@ def url(self, registration): @pytest.fixture() def payload(self): return { - "action": "copy", - "destination": { - "name": "Archive of OSF Storage", + 'action': 'copy', + 'destination': { + 'name': 'Archive of OSF Storage', }, - "errors": None, - "source": { - "provider": "osfstorage", + 'errors': None, + 'source': { + 'provider': 'osfstorage', }, - "time": time.time() + 1000 + 'time': time.time() + 1000 } def sign_payload(self, payload): From 24f4bd69c46ca8c85f7d4bfc4992f1d21e78db36 Mon Sep 17 00:00:00 2001 From: Ostap Zherebetskyi Date: Thu, 3 Apr 2025 14:39:52 +0300 Subject: [PATCH 08/23] fix test_view_classes_have_minimal_set_of_permissions_classes --- api_tests/base/test_views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api_tests/base/test_views.py b/api_tests/base/test_views.py index 2db3a4b65b2..b09df8d753c 100644 --- a/api_tests/base/test_views.py +++ b/api_tests/base/test_views.py @@ -18,6 +18,7 @@ MetricsOpenapiView, ) from api.users.views import ClaimUser, ResetPassword, ExternalLoginConfirmEmailView, ExternalLogin +from api.registrations.views import RegistrationCallbackView from api.wb.views import MoveFileMetadataView, CopyFileMetadataView from rest_framework.permissions import IsAuthenticatedOrReadOnly, IsAuthenticated from api.base.permissions import TokenHasScope @@ -63,6 +64,7 @@ def setUp(self): ResetPassword, ExternalLoginConfirmEmailView, ExternalLogin, + RegistrationCallbackView, ] def test_root_returns_200(self): From 15d71755b3c25161683eab7a7327a04cfdaa0bf9 Mon Sep 17 00:00:00 2001 From: Ostap Zherebetskyi Date: Thu, 3 Apr 2025 16:47:22 +0300 Subject: [PATCH 09/23] remove old Flask registration_callback code and related decorators --- osf_tests/test_archiver.py | 17 ---------------- website/archiver/decorators.py | 25 ----------------------- website/project/decorators.py | 19 ----------------- website/project/views/register.py | 34 +------------------------------ website/routes.py | 8 -------- 5 files changed, 1 insertion(+), 102 deletions(-) delete mode 100644 website/archiver/decorators.py diff --git a/osf_tests/test_archiver.py b/osf_tests/test_archiver.py index 3855d169acb..59c178b839d 100644 --- a/osf_tests/test_archiver.py +++ b/osf_tests/test_archiver.py @@ -22,7 +22,6 @@ from website.app import * # noqa: F403 from website.archiver import listeners from website.archiver.tasks import * # noqa: F403 -from website.archiver.decorators import fail_archive_on_error from osf.models import Guid, RegistrationSchema, Registration from osf.models.archive import ArchiveTarget, ArchiveJob @@ -1111,22 +1110,6 @@ def test_find_failed_registrations(self): assert pk not in failed -class TestArchiverDecorators(ArchiverTestCase): - - @mock.patch('website.archiver.signals.archive_fail.send') - def test_fail_archive_on_error(self, mock_fail): - e = HTTPError(418) - - def error(*args, **kwargs): - raise e - - func = fail_archive_on_error(error) - func(node=self.dst) - mock_fail.assert_called_with( - self.dst, - errors=[str(e)] - ) - class TestArchiverBehavior(OsfTestCase): @mock.patch('osf.models.AbstractNode.update_search') diff --git a/website/archiver/decorators.py b/website/archiver/decorators.py deleted file mode 100644 index 0d6f46bfb37..00000000000 --- a/website/archiver/decorators.py +++ /dev/null @@ -1,25 +0,0 @@ -import functools - -from framework.exceptions import HTTPError - -from website.project.decorators import _inject_nodes -from website.archiver import ARCHIVER_NETWORK_ERROR -from website.archiver import signals - - -def fail_archive_on_error(func): - - @functools.wraps(func) - def wrapped(*args, **kwargs): - try: - return func(*args, **kwargs) - except HTTPError as e: - _inject_nodes(kwargs) - registration = kwargs['node'] - registration.archive_status = ARCHIVER_NETWORK_ERROR - registration.save() - signals.archive_fail.send( - registration, - errors=[str(e)] - ) - return wrapped diff --git a/website/project/decorators.py b/website/project/decorators.py index 0e165146250..2d60be5359b 100644 --- a/website/project/decorators.py +++ b/website/project/decorators.py @@ -173,25 +173,6 @@ def wrapped(*args, **kwargs): return wrapped -def must_be_registration(func): - - @functools.wraps(func) - def wrapped(*args, **kwargs): - _inject_nodes(kwargs) - node = kwargs['node'] - - if not node.is_registration: - raise HTTPError( - http_status.HTTP_400_BAD_REQUEST, - data={ - 'message_short': 'Registered Nodes only', - 'message_long': 'This view is restricted to registered Nodes only', - } - ) - return func(*args, **kwargs) - - return wrapped - def check_can_download_preprint_file(user, node): """View helper that returns whether a given user can download unpublished preprint files. diff --git a/website/project/views/register.py b/website/project/views/register.py index 11a5da7f53c..265fda1edea 100644 --- a/website/project/views/register.py +++ b/website/project/views/register.py @@ -7,17 +7,12 @@ from framework.exceptions import HTTPError from framework.flask import redirect # VOL-aware redirect -from framework.auth.decorators import must_be_signed - -from website.archiver import ARCHIVER_SUCCESS, ARCHIVER_FAILURE - -from addons.base.views import DOWNLOAD_ACTIONS from website import settings from osf.exceptions import NodeStateError from website.project.decorators import ( must_be_valid_project, must_be_contributor_or_public, must_have_permission, must_be_contributor_and_not_group_member, - must_not_be_registration, must_be_registration, + must_not_be_registration, must_not_be_retracted_registration ) from osf import features @@ -26,12 +21,10 @@ from osf.utils.permissions import ADMIN from website import language from website.ember_osf_web.decorators import ember_flag_is_active -from website.project import signals as project_signals from website.project.metadata.schemas import _id_to_name from website import util from website.project.metadata.utils import serialize_meta_schema from website.project.model import has_anonymous_link -from website.archiver.decorators import fail_archive_on_error from .node import _view_project from api.waffle.utils import flag_is_active @@ -228,28 +221,3 @@ def get_referent_by_identifier(category, value): if identifier.referent.url: return redirect(identifier.referent.url) raise HTTPError(http_status.HTTP_404_NOT_FOUND) - -@fail_archive_on_error -@must_be_signed -@must_be_registration -def registration_callbacks(node, payload, *args, **kwargs): - if payload.get('action', None) in DOWNLOAD_ACTIONS: - return {'status': 'success'} - errors = payload.get('errors') - src_provider = payload['source']['provider'] - if errors: - node.archive_job.update_target( - src_provider, - ARCHIVER_FAILURE, - errors=errors, - ) - else: - # Dataverse requires two seperate targets, one - # for draft files and one for published files - if src_provider == 'dataverse': - src_provider += '-' + (payload['destination']['name'].split(' ')[-1].lstrip('(').rstrip(')').strip()) - node.archive_job.update_target( - src_provider, - ARCHIVER_SUCCESS, - ) - project_signals.archive_callback.send(node) diff --git a/website/routes.py b/website/routes.py index f6144b09f50..7b0f325fa9f 100644 --- a/website/routes.py +++ b/website/routes.py @@ -1694,14 +1694,6 @@ def make_url_map(app): addon_views.create_waterbutler_log, json_renderer, ), - Rule( - [ - '/registration//callbacks/', - ], - 'put', - project_views.register.registration_callbacks, - json_renderer, - ), Rule( '/settings/addons/', 'post', From 1bb79cf01e5bf7f73f88cf94ade25167d9d18f03 Mon Sep 17 00:00:00 2001 From: Vlad0n20 <137097005+Vlad0n20@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:44:02 +0300 Subject: [PATCH 10/23] [ENG-7503] Fix/eng 7503 (#11092) ## Purpose Fix preprint updating ## Changes Add a manage command to fix why_no_data and has_data_links for broken preprints due to an unknown bug on FE (Futa described it in the comments to the ticket) ## Ticket https://openscience.atlassian.net/browse/ENG-7503 --- admin/preprints/urls.py | 1 + admin/preprints/views.py | 16 ++ admin/templates/preprints/fix_editing.html | 21 +++ admin/templates/preprints/preprint.html | 1 + ...reprints_has_data_links_and_why_no_data.py | 109 ++++++++++++ ...reprints_has_data_links_and_why_no_data.py | 157 ++++++++++++++++++ 6 files changed, 305 insertions(+) create mode 100644 admin/templates/preprints/fix_editing.html create mode 100644 osf/management/commands/fix_preprints_has_data_links_and_why_no_data.py create mode 100644 osf_tests/management_commands/test_fix_preprints_has_data_links_and_why_no_data.py diff --git a/admin/preprints/urls.py b/admin/preprints/urls.py index f0a439f9722..2b0e573f953 100644 --- a/admin/preprints/urls.py +++ b/admin/preprints/urls.py @@ -15,6 +15,7 @@ re_path(r'^(?P\w+)/reindex_share_preprint/$', views.PreprintReindexShare.as_view(), name='reindex-share-preprint'), re_path(r'^(?P\w+)/remove_user/(?P[a-z0-9]+)/$', views.PreprintRemoveContributorView.as_view(), name='remove-user'), re_path(r'^(?P\w+)/make_private/$', views.PreprintMakePrivate.as_view(), name='make-private'), + re_path(r'^(?P\w+)/fix_editing/$', views.PreprintFixEditing.as_view(), name='fix-editing'), re_path(r'^(?P\w+)/make_public/$', views.PreprintMakePublic.as_view(), name='make-public'), re_path(r'^(?P\w+)/remove/$', views.PreprintDeleteView.as_view(), name='remove'), re_path(r'^(?P\w+)/restore/$', views.PreprintDeleteView.as_view(), name='restore'), diff --git a/admin/preprints/views.py b/admin/preprints/views.py index a936c27582e..b9b3fd3dd59 100644 --- a/admin/preprints/views.py +++ b/admin/preprints/views.py @@ -22,6 +22,7 @@ from osf.exceptions import PreprintStateError +from osf.management.commands.fix_preprints_has_data_links_and_why_no_data import process_wrong_why_not_data_preprints from osf.models import ( SpamStatus, Preprint, @@ -525,6 +526,21 @@ def post(self, request, *args, **kwargs): return redirect(self.get_success_url()) +class PreprintFixEditing(PreprintMixin, View): + """ Allows an authorized user to manually fix why not data field. + """ + permission_required = 'osf.change_node' + + def post(self, request, *args, **kwargs): + preprint = self.get_object() + process_wrong_why_not_data_preprints( + version_guid=preprint._id, + dry_run=False, + executing_through_command=False, + ) + + return redirect(self.get_success_url()) + class PreprintMakePublic(PreprintMixin, View): """ Allows an authorized user to manually make a private preprint public. diff --git a/admin/templates/preprints/fix_editing.html b/admin/templates/preprints/fix_editing.html new file mode 100644 index 00000000000..84c6e3cdd99 --- /dev/null +++ b/admin/templates/preprints/fix_editing.html @@ -0,0 +1,21 @@ +{% if perms.osf.change_node %} + + Fix why not data + + +{% endif %} diff --git a/admin/templates/preprints/preprint.html b/admin/templates/preprints/preprint.html index 2763d3d35f1..b88333311ca 100644 --- a/admin/templates/preprints/preprint.html +++ b/admin/templates/preprints/preprint.html @@ -26,6 +26,7 @@ {% include "preprints/make_private.html" with preprint=preprint %} {% include "preprints/make_public.html" with preprint=preprint %} {% include "preprints/make_published.html" with preprint=preprint %} + {% include "preprints/fix_editing.html" with preprint=preprint %} diff --git a/osf/management/commands/fix_preprints_has_data_links_and_why_no_data.py b/osf/management/commands/fix_preprints_has_data_links_and_why_no_data.py new file mode 100644 index 00000000000..84997c1fbe9 --- /dev/null +++ b/osf/management/commands/fix_preprints_has_data_links_and_why_no_data.py @@ -0,0 +1,109 @@ +from django.core.management.base import BaseCommand +from django.db.models import Q +from osf.models import Preprint, Guid +import logging + +logger = logging.getLogger(__name__) + + +def process_wrong_why_not_data_preprints( + version_guid: str | None, + dry_run: bool, + executing_through_command: bool = True, + command_obj: BaseCommand = None +): + through_command_constrain = executing_through_command and command_obj + why_no_data_filters = Q(why_no_data__isnull=False) & ~Q(why_no_data='') + + if version_guid: + base_guid_str, version = Guid.split_guid(version_guid) + preprints = Preprint.objects.filter( + versioned_guids__guid___id=base_guid_str, + versioned_guids__version=version + ) + if not preprints: + no_preprint_message = f'No preprint found with version_guid: {version_guid}' + logger.error(no_preprint_message) + if through_command_constrain: + command_obj.stdout.write(command_obj.style.ERROR(no_preprint_message)) + return + if preprints[0].has_data_links != 'no' and not preprints[0].why_no_data: + correct_behavior_message = f'Correct behavior for {preprints[0]._id} has_data_links={preprints[0].has_data_links} why_no_data={preprints[0].why_no_data}' + if through_command_constrain: + command_obj.stdout.write(correct_behavior_message) + return + + else: + preprints = Preprint.objects.filter( + ~Q(has_data_links='no') & why_no_data_filters + ) + + total = preprints.count() + logger.info(f'Found {total} preprints to process') + if through_command_constrain: + command_obj.stdout.write(f'Found {total} preprints to process') + + processed = 0 + errors = 0 + + for preprint in preprints: + try: + logger.info(f'Processing preprint {preprint._id}') + if through_command_constrain: + command_obj.stdout.write(f'Processing preprint {preprint._id} ({processed + 1}/{total})') + + if not dry_run: + preprint.why_no_data = '' + preprint.save() + logger.info(f'Updated preprint {preprint._id}') + else: + logger.info( + f'Would update preprint {preprint._id} (dry run), {preprint.has_data_links=}, {preprint.why_no_data=}' + ) + + processed += 1 + except Exception as e: + errors += 1 + logger.error(f'Error processing preprint {preprint._id}: {str(e)}') + if through_command_constrain: + command_obj.stdout.write(command_obj.style.ERROR(f'Error processing preprint {preprint._id}: {str(e)}')) + continue + + logger.info(f'Completed processing {processed} preprints with {errors} errors') + if through_command_constrain: + command_obj.stdout.write( + command_obj.style.SUCCESS( + f'Completed processing {processed} preprints with {errors} errors' + ) + ) + + +class Command(BaseCommand): + help = 'Fix preprints has_data_links and why_no_data' + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + help='Run without making changes', + ) + parser.add_argument( + '--guid', + type=str, + help='Version GUID to process (e.g. awgxb_v1, kupen_v4)', + ) + + def handle(self, *args, **options): + dry_run = options.get('dry_run', False) + version_guid = options.get('guid') + + if dry_run: + logger.info('Running in dry-run mode - no changes will be made') + self.stdout.write('Running in dry-run mode - no changes will be made') + + process_wrong_why_not_data_preprints( + version_guid=version_guid, + dry_run=dry_run, + executing_through_command=True, + command_obj=self + ) diff --git a/osf_tests/management_commands/test_fix_preprints_has_data_links_and_why_no_data.py b/osf_tests/management_commands/test_fix_preprints_has_data_links_and_why_no_data.py new file mode 100644 index 00000000000..878d610bde3 --- /dev/null +++ b/osf_tests/management_commands/test_fix_preprints_has_data_links_and_why_no_data.py @@ -0,0 +1,157 @@ +import pytest +from unittest import mock + +from django.core.management import call_command +from osf_tests.factories import PreprintFactory, PreprintProviderFactory + + +@pytest.mark.django_db +class TestFixPreprintsHasDataLinksAndWhyNoData: + + @pytest.fixture() + def preprint_not_no_with_why_no_data(self): + preprint = PreprintFactory() + preprint.has_data_links = 'available' + preprint.why_no_data = 'This should be cleared' + preprint.save() + return preprint + + @pytest.fixture() + def preprint_no_with_why_no_data(self): + preprint = PreprintFactory() + preprint.has_data_links = 'no' + preprint.why_no_data = 'Valid reason' + preprint.save() + return preprint + + @pytest.fixture() + def preprint_not_applicable_with_why_no_data(self): + preprint = PreprintFactory() + preprint.has_data_links = 'not_applicable' + preprint.why_no_data = 'This should be cleared' + preprint.save() + return preprint + + def test_fix_preprints_has_data_links_and_why_no_data( + self, preprint_not_no_with_why_no_data, preprint_no_with_why_no_data, preprint_not_applicable_with_why_no_data + ): + call_command('fix_preprints_has_data_links_and_why_no_data') + + preprint_not_no_with_why_no_data.refresh_from_db() + preprint_no_with_why_no_data.refresh_from_db() + preprint_not_applicable_with_why_no_data.refresh_from_db() + + assert preprint_not_no_with_why_no_data.why_no_data == '' + assert preprint_not_applicable_with_why_no_data.why_no_data == '' + + assert preprint_no_with_why_no_data.why_no_data == 'Valid reason' + + def test_dry_run_mode(self, preprint_not_no_with_why_no_data): + call_command('fix_preprints_has_data_links_and_why_no_data', '--dry-run') + + preprint_not_no_with_why_no_data.refresh_from_db() + assert preprint_not_no_with_why_no_data.why_no_data == 'This should be cleared' + + def test_specific_guid(self): + + preprint1 = PreprintFactory() + preprint1.has_data_links = 'available' + preprint1.why_no_data = 'This should be cleared' + preprint1.save() + + preprint2 = PreprintFactory() + preprint2.has_data_links = 'available' + preprint2.why_no_data = 'This should remain' + preprint2.save() + + call_command('fix_preprints_has_data_links_and_why_no_data', '--guid', f'{preprint1._id}') + + preprint1.refresh_from_db() + preprint2.refresh_from_db() + + assert preprint1.why_no_data == '' + assert preprint2.why_no_data == 'This should remain' + + def test_no_action_for_correct_preprints(self): + preprint = PreprintFactory() + preprint.has_data_links = 'available' + preprint.why_no_data = '' + preprint.save() + + with mock.patch('osf.models.Guid.split_guid', return_value=(preprint._id, 1)): + call_command('fix_preprints_has_data_links_and_why_no_data', '--guid', f'{preprint._id}_v1') + + preprint.refresh_from_db() + + assert preprint.has_data_links == 'available' + assert preprint.why_no_data == '' + + def test_preprints_with_null_has_data_links(self): + preprint = PreprintFactory() + preprint.has_data_links = None + preprint.why_no_data = 'Should be cleared for null has_data_links' + preprint.save() + + call_command('fix_preprints_has_data_links_and_why_no_data') + + preprint.refresh_from_db() + assert preprint.why_no_data == '' + + def test_preprints_different_providers(self): + provider1 = PreprintProviderFactory() + provider2 = PreprintProviderFactory() + + preprint1 = PreprintFactory(provider=provider1) + preprint1.has_data_links = 'available' + preprint1.why_no_data = 'Should be cleared (provider 1)' + preprint1.save() + + preprint2 = PreprintFactory(provider=provider2) + preprint2.has_data_links = 'not_applicable' + preprint2.why_no_data = 'Should be cleared (provider 2)' + preprint2.save() + + call_command('fix_preprints_has_data_links_and_why_no_data') + + preprint1.refresh_from_db() + preprint2.refresh_from_db() + + assert preprint1.why_no_data == '' + assert preprint2.why_no_data == '' + + def test_preprints_with_data_links(self): + preprint = PreprintFactory() + preprint.has_data_links = 'available' + preprint.data_links = ['https://example.com/dataset123'] + preprint.why_no_data = 'This contradicts having data links' + preprint.save() + + call_command('fix_preprints_has_data_links_and_why_no_data') + + preprint.refresh_from_db() + assert preprint.why_no_data == '' + assert preprint.data_links == ['https://example.com/dataset123'] + + def test_error_handling(self): + preprint1 = PreprintFactory() + preprint1.has_data_links = 'available' + preprint1.why_no_data = 'Should be cleared' + preprint1.save() + + preprint2 = PreprintFactory() + preprint2.has_data_links = 'available' + preprint2.why_no_data = 'Should be cleared too' + preprint2.save() + + preprint3 = PreprintFactory() + preprint3.has_data_links = 'available' + preprint3.why_no_data = 'Should also be cleared' + preprint3.save() + + call_command('fix_preprints_has_data_links_and_why_no_data') + + preprint1.refresh_from_db() + preprint3.refresh_from_db() + + assert preprint1.why_no_data == '' + assert preprint3.why_no_data == '' From 94ad504a6b84582b91fb6f9a1027f26b158043cb Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 24 Apr 2025 09:26:00 -0400 Subject: [PATCH 11/23] delete sharev2 push [ENG-7387] (#11032) clean up code we don't need anymore (and avoid error messages when https://github.com/CenterForOpenScience/SHARE/pull/859 ) contains #11012 --- api_tests/share/test_share_preprint.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/api_tests/share/test_share_preprint.py b/api_tests/share/test_share_preprint.py index 4ab47963bc8..cf0c8a3d92d 100644 --- a/api_tests/share/test_share_preprint.py +++ b/api_tests/share/test_share_preprint.py @@ -4,7 +4,7 @@ import pytest import responses -from api.share.utils import shtrove_ingest_url, sharev2_push_url +from api.share.utils import shtrove_ingest_url from framework.auth.core import Auth from osf.models.spam import SpamStatus from osf.utils.permissions import READ, WRITE, ADMIN @@ -124,14 +124,12 @@ def test_preprint_contributor_changes_updates_preprints_share(self, mock_share_r @pytest.mark.skip('Synchronous retries not supported if celery >=5.0') def test_call_async_update_on_500_failure(self, mock_share_responses, preprint, auth): mock_share_responses.replace(responses.POST, shtrove_ingest_url(), status=500) - mock_share_responses.replace(responses.POST, sharev2_push_url(), status=500) preprint.set_published(True, auth=auth, save=True) with expect_preprint_ingest_request(mock_share_responses, preprint, count=5): preprint.update_search() def test_no_call_async_update_on_400_failure(self, mock_share_responses, preprint, auth): mock_share_responses.replace(responses.POST, shtrove_ingest_url(), status=400) - mock_share_responses.replace(responses.POST, sharev2_push_url(), status=400) preprint.set_published(True, auth=auth, save=True) with expect_preprint_ingest_request(mock_share_responses, preprint, count=1, error_response=True): preprint.update_search() From 6fbef50d189e81166705e89a6b079eb377119dde Mon Sep 17 00:00:00 2001 From: antkryt Date: Thu, 24 Apr 2025 16:55:33 +0300 Subject: [PATCH 12/23] [ENG-7716] Allow for reinstatement of previous preprint versions (with date uploaded) via the admin app (#11097) ## Purpose Add "Create new version 1" button on admin UI ## Changes - Add button to admin UI - Extend existing preprint versioning functionality - Move permission validation logic from serializer to model level to keep everything in one place - add required_permission decorator (with ignore_permission functionality) that checks if user has enough permission to call method ## QA Notes Expected behavior: 1. Admin goes the preprint admin page 2. Clicks "Create new version 1", a modal asking to choose date will be displayed 3. If date is valid and file versions exists: - a new version 1 will be created - admin user will be redirected to v1 - optionally, admin user can make new preprint published by clicking "Make published" button (!) Preprint guid should point to the latest version, not the newly created one. (!) Admin should be able to create new versions of a preprint even if they are not a contributor. ## Ticket https://openscience.atlassian.net/browse/ENG-7716 --- admin/preprints/urls.py | 1 + admin/preprints/views.py | 50 +++++- admin/static/js/preprints/preprints.js | 18 ++ .../preprints/assign_new_version.html | 32 ++++ admin/templates/preprints/preprint.html | 9 + admin_tests/preprints/test_views.py | 30 ++++ api/base/utils.py | 4 +- api/preprints/serializers.py | 104 ++++-------- osf/models/mixins.py | 33 ++-- osf/models/preprint.py | 160 ++++++++++++------ website/files/utils.py | 6 +- 11 files changed, 304 insertions(+), 143 deletions(-) create mode 100644 admin/static/js/preprints/preprints.js create mode 100644 admin/templates/preprints/assign_new_version.html diff --git a/admin/preprints/urls.py b/admin/preprints/urls.py index 2b0e573f953..4ab9bd33939 100644 --- a/admin/preprints/urls.py +++ b/admin/preprints/urls.py @@ -13,6 +13,7 @@ re_path(r'^(?P\w+)/change_provider/$', views.PreprintProviderChangeView.as_view(), name='preprint-provider'), re_path(r'^(?P\w+)/machine_state/$', views.PreprintMachineStateView.as_view(), name='preprint-machine-state'), re_path(r'^(?P\w+)/reindex_share_preprint/$', views.PreprintReindexShare.as_view(), name='reindex-share-preprint'), + re_path(r'^(?P\w+)/reversion_preprint/$', views.PreprintReVersion.as_view(), name='re-version-preprint'), re_path(r'^(?P\w+)/remove_user/(?P[a-z0-9]+)/$', views.PreprintRemoveContributorView.as_view(), name='remove-user'), re_path(r'^(?P\w+)/make_private/$', views.PreprintMakePrivate.as_view(), name='make-private'), re_path(r'^(?P\w+)/fix_editing/$', views.PreprintFixEditing.as_view(), name='fix-editing'), diff --git a/admin/preprints/views.py b/admin/preprints/views.py index b9b3fd3dd59..8219ed1f4dc 100644 --- a/admin/preprints/views.py +++ b/admin/preprints/views.py @@ -1,6 +1,7 @@ +from django.db import transaction from django.db.models import F from django.core.exceptions import PermissionDenied -from django.urls import NoReverseMatch +from django.http import HttpResponse, JsonResponse from django.contrib import messages from django.contrib.auth.mixins import PermissionRequiredMixin from django.shortcuts import redirect @@ -10,7 +11,7 @@ FormView, ) from django.utils import timezone -from django.urls import reverse_lazy +from django.urls import NoReverseMatch, reverse_lazy from admin.base.views import GuidView from admin.base.forms import GuidForm @@ -19,6 +20,7 @@ from api.share.utils import update_share from api.providers.workflows import Workflows +from api.preprints.serializers import PreprintSerializer from osf.exceptions import PreprintStateError @@ -45,6 +47,7 @@ ) from osf.utils.workflows import DefaultStates from website import search +from website.files.utils import copy_files from website.preprints.tasks import on_preprint_updated @@ -56,8 +59,8 @@ def get_object(self): preprint.guid = preprint._id return preprint - def get_success_url(self): - return reverse_lazy('preprints:preprint', kwargs={'guid': self.kwargs['guid']}) + def get_success_url(self, guid=None): + return reverse_lazy('preprints:preprint', kwargs={'guid': guid or self.kwargs['guid']}) class PreprintView(PreprintMixin, GuidView): @@ -183,6 +186,45 @@ def post(self, request, *args, **kwargs): return redirect(self.get_success_url()) +class PreprintReVersion(PreprintMixin, View): + """Allows an authorized user to create new version 1 of a preprint based on earlier + primary file version(s). All operations are executed within an atomic transaction. + If any step fails, the entire transaction will be rolled back and no version will be changed. + """ + permission_required = 'osf.change_node' + + @transaction.atomic + def post(self, request, *args, **kwargs): + preprint = self.get_object() + + file_versions = request.POST.getlist('file_versions') + if not file_versions: + return HttpResponse('At least one file version should be attached.', status=400) + + versions = preprint.get_preprint_versions() + for version in versions: + version.upgrade_version() + + new_preprint, data_to_update = Preprint.create_version( + create_from_guid=preprint._id, + assign_version_number=1, + auth=request, + ignore_permission=True, + ) + primary_file = copy_files(preprint.primary_file, target_node=new_preprint, identifier__in=file_versions) + data_to_update['primary_file'] = primary_file + + # FIXME: currently it's not possible to ignore permission when update subjects + # via serializer, remove this logic if deprecated + subjects = data_to_update.pop('subjects', None) + if subjects: + new_preprint.set_subjects([subjects], auth=request, ignore_permission=True) + + PreprintSerializer(new_preprint, context={'request': request, 'ignore_permission': True}).update(new_preprint, data_to_update) + + return JsonResponse({'redirect': self.get_success_url(new_preprint._id)}) + + class PreprintReindexElastic(PreprintMixin, View): """ Allows an authorized user to reindex a node in ElasticSearch. """ diff --git a/admin/static/js/preprints/preprints.js b/admin/static/js/preprints/preprints.js new file mode 100644 index 00000000000..21217725ba2 --- /dev/null +++ b/admin/static/js/preprints/preprints.js @@ -0,0 +1,18 @@ +$(document).ready(function() { + + $("#confirmReversion").on("submit", function (event) { + event.preventDefault(); + + $.ajax({ + url: window.templateVars.reVersionPreprint, + type: "post", + data: $("#re-version-preprint-form").serialize(), + }).success(function (response) { + if (response.redirect) { + window.location.href = response.redirect; + } + }).fail(function (jqXHR, textStatus, error) { + $("#version-validation").text(jqXHR.responseText); + }); + }); +}); diff --git a/admin/templates/preprints/assign_new_version.html b/admin/templates/preprints/assign_new_version.html new file mode 100644 index 00000000000..3ee5fcce6d5 --- /dev/null +++ b/admin/templates/preprints/assign_new_version.html @@ -0,0 +1,32 @@ +{% load node_extras %} + + Create new version 1 + + + diff --git a/admin/templates/preprints/preprint.html b/admin/templates/preprints/preprint.html index b88333311ca..719304d716f 100644 --- a/admin/templates/preprints/preprint.html +++ b/admin/templates/preprints/preprint.html @@ -27,6 +27,7 @@ {% include "preprints/make_public.html" with preprint=preprint %} {% include "preprints/make_published.html" with preprint=preprint %} {% include "preprints/fix_editing.html" with preprint=preprint %} + {% include "preprints/assign_new_version.html" with preprint=preprint %} @@ -123,3 +124,11 @@

Preprint: {{ preprint.title }} {% endblock content %} +{% block bottom_js %} + + +{% endblock %} diff --git a/admin_tests/preprints/test_views.py b/admin_tests/preprints/test_views.py index 1fb9d68482d..06ec5fa8d04 100644 --- a/admin_tests/preprints/test_views.py +++ b/admin_tests/preprints/test_views.py @@ -797,3 +797,33 @@ def test_admin_user_can_publish_preprint(self, user, preprint, plain_view): preprint.reload() assert preprint.is_published + + +@pytest.mark.urls('admin.base.urls') +class TestPreprintReVersionView: + + @pytest.fixture() + def plain_view(self): + return views.PreprintReVersion + + def test_admin_user_can_add_new_version_one(self, user, preprint, plain_view): + # user isn't admin contributor in the preprint + assert preprint.contributors.filter(id=user.id).exists() is False + assert preprint.has_permission(user, ADMIN) is False + assert len(preprint.get_preprint_versions()) == 1 + + request = RequestFactory().post( + reverse('preprints:re-version-preprint', + kwargs={'guid': preprint._id}), + data={'file_versions': ['1']} + ) + request.user = user + + admin_group = Group.objects.get(name='osf_admin') + admin_group.permissions.add(Permission.objects.get(codename='change_node')) + user.groups.add(admin_group) + + plain_view.as_view()(request, guid=preprint._id) + preprint.refresh_from_db() + + assert len(preprint.get_preprint_versions()) == 2 diff --git a/api/base/utils.py b/api/base/utils.py index 0e3690e6dbd..c08c8c73977 100644 --- a/api/base/utils.py +++ b/api/base/utils.py @@ -69,7 +69,9 @@ def get_user_auth(request): authenticated user attached to it. """ user = request.user - private_key = request.query_params.get('view_only', None) + private_key = None + if hasattr(request, 'query_params'): # allows django WSGIRequest to be used as well + private_key = request.query_params.get('view_only', None) if user.is_anonymous: auth = Auth(None, private_key=private_key) else: diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py index 2e802a438a0..819c17ab58b 100644 --- a/api/preprints/serializers.py +++ b/api/preprints/serializers.py @@ -296,7 +296,8 @@ def update(self, preprint, validated_data): assert isinstance(preprint, Preprint), 'You must specify a valid preprint to be updated' auth = get_user_auth(self.context['request']) - if not preprint.has_permission(auth.user, osf_permissions.WRITE): + ignore_permission = self.context.get('ignore_permission', False) + if not ignore_permission and not preprint.has_permission(auth.user, osf_permissions.WRITE): raise exceptions.PermissionDenied(detail='User must have admin or write permissions to update a preprint.') for field in ['conflict_of_interest_statement', 'why_no_data', 'why_no_prereg']: @@ -344,76 +345,45 @@ def update(self, preprint, validated_data): detail='You cannot edit this field while your prereg links availability is set to false or is unanswered.', ) - def require_admin_permission(): - if not preprint.has_permission(auth.user, osf_permissions.ADMIN): - raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.') + try: + if 'has_coi' in validated_data: + preprint.update_has_coi(auth, validated_data['has_coi'], ignore_permission=ignore_permission) - if 'has_coi' in validated_data: - require_admin_permission() - try: - preprint.update_has_coi(auth, validated_data['has_coi']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'conflict_of_interest_statement' in validated_data: + preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'], ignore_permission=ignore_permission) - if 'conflict_of_interest_statement' in validated_data: - require_admin_permission() - try: - preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'has_data_links' in validated_data: + preprint.update_has_data_links(auth, validated_data['has_data_links'], ignore_permission=ignore_permission) - if 'has_data_links' in validated_data: - require_admin_permission() - try: - preprint.update_has_data_links(auth, validated_data['has_data_links']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'why_no_data' in validated_data: + preprint.update_why_no_data(auth, validated_data['why_no_data'], ignore_permission=ignore_permission) - if 'why_no_data' in validated_data: - require_admin_permission() - try: - preprint.update_why_no_data(auth, validated_data['why_no_data']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'has_prereg_links' in validated_data: + preprint.update_has_prereg_links(auth, validated_data['has_prereg_links'], ignore_permission=ignore_permission) - if 'data_links' in validated_data: - require_admin_permission() - try: - preprint.update_data_links(auth, validated_data['data_links']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) - else: - if updated_has_data_links == 'no' and preprint.data_links: - preprint.update_data_links(auth, []) - - if 'has_prereg_links' in validated_data: - require_admin_permission() + if 'why_no_prereg' in validated_data: + preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'], ignore_permission=ignore_permission) - try: - preprint.update_has_prereg_links(auth, validated_data['has_prereg_links']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) - - if 'why_no_prereg' in validated_data: - require_admin_permission() - try: - preprint.update_why_no_prereg(auth, validated_data['why_no_prereg']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'prereg_links' in validated_data: + preprint.update_prereg_links(auth, validated_data['prereg_links'], ignore_permission=ignore_permission) - if 'prereg_links' in validated_data: - require_admin_permission() - try: - preprint.update_prereg_links(auth, validated_data['prereg_links']) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) + if 'prereg_link_info' in validated_data: + preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'], ignore_permission=ignore_permission) + except PreprintStateError as e: + raise exceptions.ValidationError(detail=str(e)) + except PermissionsError: + raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.') - if 'prereg_link_info' in validated_data: - require_admin_permission() + if 'data_links' in validated_data: try: - preprint.update_prereg_link_info(auth, validated_data['prereg_link_info']) + preprint.update_data_links(auth, validated_data['data_links'], ignore_permission=ignore_permission) except PreprintStateError as e: raise exceptions.ValidationError(detail=str(e)) + except PermissionsError: + raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.') + else: + if updated_has_data_links == 'no' and preprint.data_links: + preprint.update_data_links(auth, [], ignore_permission=ignore_permission) published = validated_data.pop('is_published', None) if published and preprint.provider.is_reviewed: @@ -434,7 +404,7 @@ def require_admin_permission(): primary_file = validated_data.pop('primary_file', None) if primary_file: - self.set_field(preprint.set_primary_file, primary_file, auth) + self.set_field(preprint.set_primary_file, primary_file, auth, ignore_permission=ignore_permission) old_tags = set(preprint.tags.values_list('name', flat=True)) if 'tags' in validated_data: @@ -451,7 +421,7 @@ def require_admin_permission(): if 'node' in validated_data: node = validated_data.pop('node', None) - self.set_field(preprint.set_supplemental_node, node, auth) + self.set_field(preprint.set_supplemental_node, node, auth, ignore_permission=ignore_permission) if 'subjects' in validated_data: subjects = validated_data.pop('subjects', None) @@ -459,11 +429,11 @@ def require_admin_permission(): if 'title' in validated_data: title = validated_data['title'] - self.set_field(preprint.set_title, title, auth) + self.set_field(preprint.set_title, title, auth, ignore_permission=ignore_permission) if 'description' in validated_data: description = validated_data['description'] - self.set_field(preprint.set_description, description, auth) + self.set_field(preprint.set_description, description, auth, ignore_permission=ignore_permission) if 'article_doi' in validated_data: preprint.article_doi = validated_data['article_doi'] @@ -483,7 +453,7 @@ def require_admin_permission(): raise exceptions.ValidationError( detail='A valid primary_file must be set before publishing a preprint.', ) - self.set_field(preprint.set_published, published, auth) + self.set_field(preprint.set_published, published, auth, ignore_permission=ignore_permission) recently_published = published preprint.set_privacy('public', log=False, save=True) @@ -506,9 +476,9 @@ def require_admin_permission(): return preprint - def set_field(self, func, val, auth, save=False): + def set_field(self, func, val, auth, **kwargs): try: - func(val, auth) + func(val, auth, **kwargs) except PermissionsError as e: raise exceptions.PermissionDenied(detail=str(e)) except (ValueError, ValidationError, NodeStateError) as e: diff --git a/osf/models/mixins.py b/osf/models/mixins.py index 9027a284f7c..6f1c7f36f6f 100644 --- a/osf/models/mixins.py +++ b/osf/models/mixins.py @@ -1116,25 +1116,26 @@ def subjects_relationship_url(self): def subjects_url(self): return self.absolute_api_v2_url + 'subjects/' - def check_subject_perms(self, auth): + def check_subject_perms(self, auth, ignore_permission=False): AbstractNode = apps.get_model('osf.AbstractNode') Preprint = apps.get_model('osf.Preprint') CollectionSubmission = apps.get_model('osf.CollectionSubmission') DraftRegistration = apps.get_model('osf.DraftRegistration') - if isinstance(self, AbstractNode): - if not self.has_permission(auth.user, ADMIN): - raise PermissionsError('Only admins can change subjects.') - elif isinstance(self, Preprint): - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have admin or write permissions to change a preprint\'s subjects.') - elif isinstance(self, DraftRegistration): - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have write permissions to change a draft registration\'s subjects.') - elif isinstance(self, CollectionSubmission): - if not self.guid.referent.has_permission(auth.user, ADMIN) and not auth.user.has_perms( - self.collection.groups[ADMIN], self.collection): - raise PermissionsError('Only admins can change subjects.') + if not ignore_permission: + if isinstance(self, AbstractNode): + if not self.has_permission(auth.user, ADMIN): + raise PermissionsError('Only admins can change subjects.') + elif isinstance(self, Preprint): + if not self.has_permission(auth.user, WRITE): + raise PermissionsError('Must have admin or write permissions to change a preprint\'s subjects.') + elif isinstance(self, DraftRegistration): + if not self.has_permission(auth.user, WRITE): + raise PermissionsError('Must have write permissions to change a draft registration\'s subjects.') + elif isinstance(self, CollectionSubmission): + if not self.guid.referent.has_permission(auth.user, ADMIN) and not auth.user.has_perms( + self.collection.groups[ADMIN], self.collection): + raise PermissionsError('Only admins can change subjects.') return def add_subjects_log(self, old_subjects, auth): @@ -1157,7 +1158,7 @@ def assert_subject_format(self, subj_list, expect_list, error_msg): if (expect_list and not is_list) or (not expect_list and is_list): raise ValidationValueError(f'Subjects are improperly formatted. {error_msg}') - def set_subjects(self, new_subjects, auth, add_log=True): + def set_subjects(self, new_subjects, auth, add_log=True, **kwargs): """ Helper for setting M2M subjects field from list of hierarchies received from UI. Only authorized admins may set subjects. @@ -1168,7 +1169,7 @@ def set_subjects(self, new_subjects, auth, add_log=True): :return: None """ if auth: - self.check_subject_perms(auth) + self.check_subject_perms(auth, **kwargs) self.assert_subject_format(new_subjects, expect_list=True, error_msg='Expecting list of lists.') old_subjects = list(self.subjects.values_list('id', flat=True)) diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 30fdb19df04..992c93f823e 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -1,11 +1,12 @@ import functools +import inspect from urllib.parse import urljoin import logging import re from dirtyfields import DirtyFieldsMixin from django.db import models, IntegrityError -from django.db.models import Q +from django.db.models import Q, Max from django.utils import timezone from django.contrib.contenttypes.fields import GenericRelation from django.core.exceptions import ValidationError @@ -146,6 +147,35 @@ class EverPublishedPreprintManager(PreprintManager): def get_queryset(self): return super().get_queryset().filter(date_published__isnull=False) + +def require_permission(permissions: list): + """ + Preprint-specific decorator for permission checks. + + This decorator adds an implicit `ignore_permission` argument to the decorated function, + allowing you to bypass the permission check when set to `True`. + + Usage example: + preprint.some_method(..., ignore_permission=True) # Skips permission check + """ + def decorator(func): + @functools.wraps(func) + def wrapper(self, *args, ignore_permission=False, **kwargs): + sig = inspect.signature(func) + bound_args = sig.bind_partial(self, *args, **kwargs) + bound_args.apply_defaults() + + auth = bound_args.arguments.get('auth', None) + + if not ignore_permission and auth is not None: + for permission in permissions: + if not self.has_permission(auth.user, permission): + raise PermissionsError(f'Must have following permissions to change a preprint: {permissions}') + return func(self, *args, ignore_permission=ignore_permission, **kwargs) + return wrapper + return decorator + + class Preprint(DirtyFieldsMixin, VersionedGuidMixin, IdentifierMixin, ReviewableMixin, BaseModel, TitleMixin, DescriptionMixin, Loggable, Taggable, ContributorMixin, GuardianMixin, SpamOverrideMixin, TaxonomizableMixin, AffiliatedInstitutionMixin): @@ -374,12 +404,14 @@ def check_unfinished_or_unpublished_version(self): return None, None @classmethod - def create_version(cls, create_from_guid, auth): + def create_version(cls, create_from_guid, auth, assign_version_number=None, ignore_permission=False): """Create a new version for a given preprint. `create_from_guid` can be any existing versions of the preprint but `create_version` always finds the latest version and creates a new version from it. In addition, this creates an "incomplete" new preprint version object using the model class and returns both the new object and the data to be updated. The API, more specifically `PreprintCreateVersionSerializer` must call `.update()` to "completely finish" the new preprint version object creation. + Optionally, you can assign a custom version number, as long as it doesn't conflict with existing versions. + The version must be an integer greater than 0. """ # Use `Guid.load()` instead of `VersionedGuid.load()` to retrieve the base guid obj, which always points to the @@ -389,7 +421,7 @@ def create_version(cls, create_from_guid, auth): if not latest_version: sentry.log_message(f'Preprint not found: [guid={guid_obj._id}, create_from_guid={create_from_guid}]') return None, None - if not latest_version.has_permission(auth.user, ADMIN): + if not ignore_permission and not latest_version.has_permission(auth.user, ADMIN): sentry.log_message(f'ADMIN permission for the latest version is required to create a new version: ' f'[user={auth.user._id}, guid={guid_obj._id}, latest_version={latest_version._id}]') raise PermissionsError @@ -407,9 +439,6 @@ def create_version(cls, create_from_guid, auth): f'state={unfinished_version.machine_state}].') return unfinished_version, None - # Note: version number bumps from the last version number instead of the latest version number - last_version_number = guid_obj.versions.order_by('-version').first().version - # Prepare the data to clone/update data_to_update = { 'subjects': [el for el in latest_version.subjects.all().values_list('_id', flat=True)], @@ -438,13 +467,30 @@ def create_version(cls, create_from_guid, auth): description=latest_version.description, ) preprint.save(guid_ready=False) + + # Note: version number bumps from the last version number instead of the latest version number + # if assign_version_number is not specified + if assign_version_number: + if not isinstance(assign_version_number, int) or assign_version_number <= 0: + raise ValueError( + f"Unable to assign: {assign_version_number}. " + 'Version must be integer greater than 0.' + ) + if GuidVersionsThrough.objects.filter(guid=guid_obj, version=assign_version_number).first(): + raise ValueError(f"Version {assign_version_number} for preprint {guid_obj} already exists.") + + version_number = assign_version_number + else: + last_version_number = guid_obj.versions.order_by('-version').first().version + version_number = last_version_number + 1 + # Create a new entry in the `GuidVersionsThrough` table to store version information, which must happen right # after the first `.save()` of the new preprint version object, which enables `preprint._id` to be computed. guid_version = GuidVersionsThrough( referent=preprint, object_id=guid_obj.object_id, content_type=guid_obj.content_type, - version=last_version_number + 1, + version=version_number, guid=guid_obj ) guid_version.save() @@ -482,8 +528,8 @@ def create_version(cls, create_from_guid, auth): for institution in latest_version.affiliated_institutions.all(): preprint.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True) - # Update Guid obj to point to the new version if there is no moderation - if not preprint.provider.reviews_workflow: + # Update Guid obj to point to the new version if there is no moderation and new version is bigger + if not preprint.provider.reviews_workflow and version_number > guid_obj.referent.version: guid_obj.referent = preprint guid_obj.object_id = preprint.pk guid_obj.content_type = ContentType.objects.get_for_model(preprint) @@ -494,6 +540,14 @@ def create_version(cls, create_from_guid, auth): return preprint, data_to_update + def upgrade_version(self): + """Increase preprint version by one.""" + guid_version = GuidVersionsThrough.objects.get(object_id=self.id) + guid_version.version += 1 + guid_version.save() + + return self + @property def is_deleted(self): return bool(self.deleted) @@ -707,9 +761,14 @@ def is_latest_version(self): def get_preprint_versions(self, include_rejected=True): guids = self.versioned_guids.first().guid.versions.all() - preprint_versions = Preprint.objects.filter(id__in=[vg.object_id for vg in guids]).order_by('-id') + preprint_versions = ( + Preprint.objects + .filter(id__in=[vg.object_id for vg in guids]) + .annotate(latest_version=Max('versioned_guids__version')) + .order_by('-latest_version') + ) if include_rejected is False: - preprint_versions = [p for p in preprint_versions if p.machine_state != 'rejected'] + preprint_versions = preprint_versions.exclude(machine_state=DefaultStates.REJECTED.value) return preprint_versions def web_url_for(self, view_name, _absolute=False, _guid=False, *args, **kwargs): @@ -768,13 +827,11 @@ def add_subjects_log(self, old_subjects, auth): ) return - def set_primary_file(self, preprint_file, auth, save=False): + @require_permission([WRITE]) + def set_primary_file(self, preprint_file, auth, save=False, **kwargs): if not self.root_folder: raise PreprintStateError('Preprint needs a root folder.') - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have admin or write permissions to change a preprint\'s primary file.') - if preprint_file.target != self or preprint_file.provider != 'osfstorage': raise ValueError('This file is not a valid primary file for this preprint.') @@ -800,10 +857,8 @@ def set_primary_file(self, preprint_file, auth, save=False): self.save() update_or_enqueue_on_preprint_updated(preprint_id=self._id, saved_fields=['primary_file']) - def set_published(self, published, auth, save=False, ignore_permission=False): - if not ignore_permission and not self.has_permission(auth.user, ADMIN): - raise PermissionsError('Only admins can publish a preprint.') - + @require_permission([ADMIN]) + def set_published(self, published, auth, save=False, **kwargs): if self.is_published and not published: raise ValueError('Cannot unpublish preprint.') @@ -820,7 +875,7 @@ def set_published(self, published, auth, save=False, ignore_permission=False): raise ValueError('Preprint must have at least one subject to be published.') self.date_published = timezone.now() # For legacy preprints, not logging - self.set_privacy('public', log=False, save=False) + self.set_privacy('public', log=False, save=False, **kwargs) # In case this provider is ever set up to use a reviews workflow, put this preprint in a sensible state self.machine_state = ReviewStates.ACCEPTED.value @@ -1042,10 +1097,8 @@ def remove_tag(self, tag, auth, save=True): update_or_enqueue_on_preprint_updated(preprint_id=self._id, saved_fields=['tags']) return True - def set_supplemental_node(self, node, auth, save=False, ignore_node_permissions=False): - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('You must have write permissions to set a supplemental node.') - + @require_permission([WRITE]) + def set_supplemental_node(self, node, auth, save=False, ignore_node_permissions=False, **kwargs): if not node.has_permission(auth.user, WRITE) and not ignore_node_permissions: raise PermissionsError('You must have write permissions on the supplemental node to attach.') @@ -1067,10 +1120,8 @@ def set_supplemental_node(self, node, auth, save=False, ignore_node_permissions= if save: self.save() - def unset_supplemental_node(self, auth, save=False): - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('You must have write permissions to set a supplemental node.') - + @require_permission([WRITE]) + def unset_supplemental_node(self, auth, save=False, **kwargs): current_node_id = self.node._id if self.node else None self.node = None @@ -1087,27 +1138,23 @@ def unset_supplemental_node(self, auth, save=False): if save: self.save() - def set_title(self, title, auth, save=False): + @require_permission([WRITE]) + def set_title(self, title, auth, save=False, **kwargs): """Set the title of this Preprint and log it. :param str title: The new title. :param auth: All the auth information including user, API key. """ - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have admin or write permissions to edit a preprint\'s title.') - return super().set_title(title, auth, save) - def set_description(self, description, auth, save=False): + @require_permission([WRITE]) + def set_description(self, description, auth, save=False, **kwargs): """Set the description and log the event. :param str description: The new description :param auth: All the auth informtion including user, API key. :param bool save: Save self after updating. """ - if not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have admin or write permissions to edit a preprint\'s title.') - return super().set_description(description, auth, save) def get_spam_fields(self, saved_fields=None): @@ -1115,7 +1162,8 @@ def get_spam_fields(self, saved_fields=None): return self.SPAM_CHECK_FIELDS return self.SPAM_CHECK_FIELDS.intersection(saved_fields) - def set_privacy(self, permissions, auth=None, log=True, save=True, check_addons=False, force=False, should_hide=False): + @require_permission([WRITE]) + def set_privacy(self, permissions, auth=None, log=True, save=True, check_addons=False, force=False, should_hide=False, **kwargs): """Set the permissions for this preprint - mainly for spam purposes. :param permissions: A string, either 'public' or 'private' @@ -1124,8 +1172,6 @@ def set_privacy(self, permissions, auth=None, log=True, save=True, check_addons= :param bool meeting_creation: Whether this was created due to a meetings email. :param bool check_addons: Check and collect messages for addons? """ - if auth and not self.has_permission(auth.user, WRITE): - raise PermissionsError('Must have admin or write permissions to change privacy settings.') if permissions == 'public' and not self.is_public: if (self.is_spam or (settings.SPAM_FLAGGED_MAKE_NODE_PRIVATE and self.is_spammy)) and not force: raise PreprintStateError( @@ -1191,7 +1237,8 @@ def set_contributor_order(self, contributor_ids): @classmethod def bulk_update_search(cls, preprints, index=None): for _preprint in preprints: - update_share(_preprint) + if _preprint.is_latest_version: + update_share(_preprint) from website import search try: serialize = functools.partial(search.search.update_preprint, index=index, bulk=True, async_update=False) @@ -1269,7 +1316,8 @@ def _add_related_source_tags(self, contributor): system_tag_to_add, created = Tag.all_tags.get_or_create(name=provider_source_tag(self.provider._id, 'preprint'), system=True) contributor.add_system_tag(system_tag_to_add) - def update_has_coi(self, auth: Auth, has_coi: bool, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_has_coi(self, auth: Auth, has_coi: bool, log: bool = True, save: bool = True, **kwargs): """ This method sets the field `has_coi` to indicate if there's a conflict interest statement for this preprint and logs that change. @@ -1301,7 +1349,8 @@ def update_has_coi(self, auth: Auth, has_coi: bool, log: bool = True, save: bool if save: self.save() - def update_conflict_of_interest_statement(self, auth: Auth, coi_statement: str, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_conflict_of_interest_statement(self, auth: Auth, coi_statement: str, log: bool = True, save: bool = True, **kwargs): """ This method sets the `conflict_of_interest_statement` field for this preprint and logs that change. @@ -1330,7 +1379,8 @@ def update_conflict_of_interest_statement(self, auth: Auth, coi_statement: str, if save: self.save() - def update_has_data_links(self, auth: Auth, has_data_links: bool, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_has_data_links(self, auth: Auth, has_data_links: bool, log: bool = True, save: bool = True, **kwargs): """ This method sets the `has_data_links` field that respresent the availability of links to supplementary data for this preprint and logs that change. @@ -1361,11 +1411,12 @@ def update_has_data_links(self, auth: Auth, has_data_links: bool, log: bool = Tr auth=auth ) if not has_data_links: - self.update_data_links(auth, data_links=[], log=False) + self.update_data_links(auth, data_links=[], log=False, **kwargs) if save: self.save() - def update_data_links(self, auth: Auth, data_links: list, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_data_links(self, auth: Auth, data_links: list, log: bool = True, save: bool = True, **kwargs): """ This method sets the field `data_links` which is a validated list of links to supplementary data for a preprint and logs that change. @@ -1397,7 +1448,8 @@ def update_data_links(self, auth: Auth, data_links: list, log: bool = True, save if save: self.save() - def update_why_no_data(self, auth: Auth, why_no_data: str, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_why_no_data(self, auth: Auth, why_no_data: str, log: bool = True, save: bool = True, **kwargs): """ This method sets the field `why_no_data` a string that represents a user provided explanation for the unavailability of supplementary data for their preprint. @@ -1429,7 +1481,8 @@ def update_why_no_data(self, auth: Auth, why_no_data: str, log: bool = True, sav if save: self.save() - def update_has_prereg_links(self, auth: Auth, has_prereg_links: bool, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_has_prereg_links(self, auth: Auth, has_prereg_links: bool, log: bool = True, save: bool = True, **kwargs): """ This method updates the `has_prereg_links` field, that indicates availability of links to prereg data and logs changes to it. @@ -1461,12 +1514,13 @@ def update_has_prereg_links(self, auth: Auth, has_prereg_links: bool, log: bool auth=auth ) if not has_prereg_links: - self.update_prereg_links(auth, prereg_links=[], log=False) - self.update_prereg_link_info(auth, prereg_link_info=None, log=False) + self.update_prereg_links(auth, prereg_links=[], log=False, **kwargs) + self.update_prereg_link_info(auth, prereg_link_info=None, log=False, **kwargs) if save: self.save() - def update_why_no_prereg(self, auth: Auth, why_no_prereg: str, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_why_no_prereg(self, auth: Auth, why_no_prereg: str, log: bool = True, save: bool = True, **kwargs): """ This method updates the field `why_no_prereg` that contains a user provided explanation of prereg data unavailability and logs changes to it. @@ -1498,7 +1552,8 @@ def update_why_no_prereg(self, auth: Auth, why_no_prereg: str, log: bool = True, if save: self.save() - def update_prereg_links(self, auth: Auth, prereg_links: list, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_prereg_links(self, auth: Auth, prereg_links: list, log: bool = True, save: bool = True, **kwargs): """ This method updates the field `prereg_links` that contains a list of validated URLS linking to prereg data and logs changes to it. @@ -1530,7 +1585,8 @@ def update_prereg_links(self, auth: Auth, prereg_links: list, log: bool = True, if save: self.save() - def update_prereg_link_info(self, auth: Auth, prereg_link_info: str, log: bool = True, save: bool = True): + @require_permission([ADMIN]) + def update_prereg_link_info(self, auth: Auth, prereg_link_info: str, log: bool = True, save: bool = True, **kwargs): """ This method updates the field `prereg_link_info` that contains a one of a finite number of choice strings in contained in the list in the static member `PREREG_LINK_INFO_CHOICES` that describe the nature of the preprint's diff --git a/website/files/utils.py b/website/files/utils.py index 6121c4fb757..50c25cefd13 100644 --- a/website/files/utils.py +++ b/website/files/utils.py @@ -1,7 +1,7 @@ from osf.models.metadata import GuidMetadataRecord -def copy_files(src, target_node, parent=None, name=None): +def copy_files(src, target_node, parent=None, name=None, **version_filters): """Copy the files from src to the target node :param Folder src: The source to copy children from :param Node target_node: The node to copy files to @@ -18,7 +18,7 @@ def copy_files(src, target_node, parent=None, name=None): cloned.save() if src.is_file and src.versions.exists(): - fileversions = src.versions.select_related('region').order_by('-created') + fileversions = src.versions.filter(**version_filters).select_related('region').order_by('-created') most_recent_fileversion = fileversions.first() if most_recent_fileversion.region and most_recent_fileversion.region != target_node.osfstorage_region: # add all original version except the most recent @@ -29,7 +29,7 @@ def copy_files(src, target_node, parent=None, name=None): new_fileversion.save() attach_versions(cloned, [new_fileversion], src) else: - attach_versions(cloned, src.versions.all(), src) + attach_versions(cloned, fileversions, src) if renaming: latest_version = cloned.versions.first() From 36887123687a8da82b7497fbcdef9f93ad4ee8b0 Mon Sep 17 00:00:00 2001 From: antkryt Date: Thu, 24 Apr 2025 20:52:54 +0300 Subject: [PATCH 13/23] fix feature for non-contributor admin (#11111) ## Purpose fix for non-contributor admin ## Changes - add ignore_permission to set_license - prevent creating new preprint version when serializer raises error https://openscience.atlassian.net/browse/ENG-7716 --- admin/preprints/views.py | 46 +++++++++++++++------------- api/preprints/serializers.py | 21 +++++-------- osf/models/preprint.py | 5 +-- website/project/licenses/__init__.py | 4 +-- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/admin/preprints/views.py b/admin/preprints/views.py index 8219ed1f4dc..36fdb877029 100644 --- a/admin/preprints/views.py +++ b/admin/preprints/views.py @@ -23,6 +23,7 @@ from api.preprints.serializers import PreprintSerializer from osf.exceptions import PreprintStateError +from rest_framework.exceptions import PermissionDenied as DrfPermissionDenied from osf.management.commands.fix_preprints_has_data_links_and_why_no_data import process_wrong_why_not_data_preprints from osf.models import ( @@ -193,7 +194,6 @@ class PreprintReVersion(PreprintMixin, View): """ permission_required = 'osf.change_node' - @transaction.atomic def post(self, request, *args, **kwargs): preprint = self.get_object() @@ -201,26 +201,30 @@ def post(self, request, *args, **kwargs): if not file_versions: return HttpResponse('At least one file version should be attached.', status=400) - versions = preprint.get_preprint_versions() - for version in versions: - version.upgrade_version() - - new_preprint, data_to_update = Preprint.create_version( - create_from_guid=preprint._id, - assign_version_number=1, - auth=request, - ignore_permission=True, - ) - primary_file = copy_files(preprint.primary_file, target_node=new_preprint, identifier__in=file_versions) - data_to_update['primary_file'] = primary_file - - # FIXME: currently it's not possible to ignore permission when update subjects - # via serializer, remove this logic if deprecated - subjects = data_to_update.pop('subjects', None) - if subjects: - new_preprint.set_subjects([subjects], auth=request, ignore_permission=True) - - PreprintSerializer(new_preprint, context={'request': request, 'ignore_permission': True}).update(new_preprint, data_to_update) + try: + with transaction.atomic(): + versions = preprint.get_preprint_versions() + for version in versions: + version.upgrade_version() + + new_preprint, data_to_update = Preprint.create_version( + create_from_guid=preprint._id, + assign_version_number=1, + auth=request, + ignore_permission=True, + ) + primary_file = copy_files(preprint.primary_file, target_node=new_preprint, identifier__in=file_versions) + data_to_update['primary_file'] = primary_file + + # FIXME: currently it's not possible to ignore permission when update subjects + # via serializer, remove this logic if deprecated + subjects = data_to_update.pop('subjects', None) + if subjects: + new_preprint.set_subjects([subjects], auth=request, ignore_permission=True) + + PreprintSerializer(new_preprint, context={'request': request, 'ignore_permission': True}).update(new_preprint, data_to_update) + except DrfPermissionDenied as exc: + return HttpResponse(f'Not enough permissions to perform this action : {str(exc)}', status=400) return JsonResponse({'redirect': self.get_success_url(new_preprint._id)}) diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py index 819c17ab58b..117547230e4 100644 --- a/api/preprints/serializers.py +++ b/api/preprints/serializers.py @@ -369,22 +369,17 @@ def update(self, preprint, validated_data): if 'prereg_link_info' in validated_data: preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'], ignore_permission=ignore_permission) + + if 'data_links' in validated_data: + preprint.update_data_links(auth, validated_data['data_links'], ignore_permission=ignore_permission) + else: + if updated_has_data_links == 'no' and preprint.data_links: + preprint.update_data_links(auth, [], ignore_permission=ignore_permission) except PreprintStateError as e: raise exceptions.ValidationError(detail=str(e)) except PermissionsError: raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.') - if 'data_links' in validated_data: - try: - preprint.update_data_links(auth, validated_data['data_links'], ignore_permission=ignore_permission) - except PreprintStateError as e: - raise exceptions.ValidationError(detail=str(e)) - except PermissionsError: - raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.') - else: - if updated_has_data_links == 'no' and preprint.data_links: - preprint.update_data_links(auth, [], ignore_permission=ignore_permission) - published = validated_data.pop('is_published', None) if published and preprint.provider.is_reviewed: url = absolute_reverse( @@ -440,7 +435,7 @@ def update(self, preprint, validated_data): if 'license_type' in validated_data or 'license' in validated_data: license_details = get_license_details(preprint, validated_data) - self.set_field(preprint.set_preprint_license, license_details, auth) + self.set_field(preprint.set_preprint_license, license_details, auth, ignore_permission=ignore_permission) if 'original_publication_date' in validated_data: preprint.original_publication_date = validated_data['original_publication_date'] or None @@ -455,7 +450,7 @@ def update(self, preprint, validated_data): ) self.set_field(preprint.set_published, published, auth, ignore_permission=ignore_permission) recently_published = published - preprint.set_privacy('public', log=False, save=True) + preprint.set_privacy('public', log=False, save=True, ignore_permission=ignore_permission) try: preprint.full_clean() diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 992c93f823e..6693490981d 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -897,8 +897,9 @@ def set_published(self, published, auth, save=False, **kwargs): if save: self.save() - def set_preprint_license(self, license_detail, auth, save=False): - license_record, license_changed = set_license(self, license_detail, auth, node_type='preprint') + @require_permission([WRITE]) + def set_preprint_license(self, license_detail, auth, save=False, **kwargs): + license_record, license_changed = set_license(self, license_detail, auth, node_type='preprint', **kwargs) if license_changed: self.add_log( diff --git a/website/project/licenses/__init__.py b/website/project/licenses/__init__.py index 69e34744a96..07095936cfe 100644 --- a/website/project/licenses/__init__.py +++ b/website/project/licenses/__init__.py @@ -6,7 +6,7 @@ from osf.utils import permissions -def set_license(node, license_detail, auth, node_type='node'): +def set_license(node, license_detail, auth, node_type='node', ignore_permission=False): NodeLicense = apps.get_model('osf.NodeLicense') NodeLicenseRecord = apps.get_model('osf.NodeLicenseRecord') @@ -26,7 +26,7 @@ def set_license(node, license_detail, auth, node_type='node'): ): return {}, False - if not node.has_permission(auth.user, permissions.WRITE): + if not ignore_permission and not node.has_permission(auth.user, permissions.WRITE): raise framework_exceptions.PermissionsError(f'You need admin or write permissions to change a {node_type}\'s license') try: From 436cd20c08fea83a7a364535be8d021011c25868 Mon Sep 17 00:00:00 2001 From: Vlad0n20 <137097005+Vlad0n20@users.noreply.github.com> Date: Thu, 24 Apr 2025 23:30:17 +0300 Subject: [PATCH 14/23] [ENG-7263] Part 2 (#11110) ## Purpose Fix the errors in the management command ## Changes Remove useless parameter ## Ticket https://openscience.atlassian.net/browse/ENG-7263 --- api/preprints/serializers.py | 7 +++++++ ...unclaimed_records_for_preprint_versions.py | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py index 117547230e4..1d7b0abd014 100644 --- a/api/preprints/serializers.py +++ b/api/preprints/serializers.py @@ -531,6 +531,13 @@ def create(self, validated_data): raise Conflict(detail='Failed to create a new preprint version due to unpublished pending version exists.') if not preprint: raise NotFound(detail='Failed to create a new preprint version due to source preprint not found.') + for contributor in preprint.contributor_set.filter(user__is_registered=False): + contributor.user.add_unclaimed_record( + claim_origin=preprint, + referrer=auth.user, + email=contributor.user.email, + given_name=contributor.user.fullname, + ) if data_to_update: return self.update(preprint, data_to_update) return preprint diff --git a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py index 655e4b6c039..6ca1cd0e95f 100644 --- a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py +++ b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py @@ -22,6 +22,15 @@ def handle(self, *args, **options): dry_run = options.get('dry_run', False) update_unclaimed_records_for_preprint_versions(dry_run=dry_run) +def safe_sort_key(x, delimiter): + parts = x.split(delimiter) + if len(parts) > 1: + try: + return int(parts[1]) + except (ValueError, TypeError): + return 0 + return 0 + def update_unclaimed_records_for_preprint_versions(dry_run=False): Preprint = apps.get_model('osf.Preprint') @@ -70,13 +79,15 @@ def update_unclaimed_records_for_preprint_versions(dry_run=False): latest_version_number = latest_version_through.version unregistered_contributors = preprint.contributor_set.filter(user__is_registered=False) logger.info(f'Found {unregistered_contributors.count()} unregistered contributors for preprint {preprint._id}') - + delimiter = Preprint.GUID_VERSION_DELIMITER for contributor in unregistered_contributors: try: - records_key_for_current_guid = [key for key in contributor.user.unclaimed_records.keys() if guid in key] + records_key_for_current_guid = [ + key for key in contributor.user.unclaimed_records.keys() if guid in key and delimiter in key + ] if records_key_for_current_guid: records_key_for_current_guid.sort( - key=lambda x: int(x.split(Preprint.GUID_VERSION_DELIMITER)[1]), + key=lambda x: safe_sort_key(x, delimiter), ) record_info = contributor.user.unclaimed_records[records_key_for_current_guid[0]] for current_version in range(1, int(latest_version_number) + 1): @@ -101,7 +112,6 @@ def update_unclaimed_records_for_preprint_versions(dry_run=False): referrer=referrer, given_name=record_info.get('name', None), email=record_info.get('email', None), - provided_pid=preprint_id, ) contributor.user.save() updated_count += 1 @@ -127,7 +137,6 @@ def update_unclaimed_records_for_preprint_versions(dry_run=False): referrer=current_preprint.creator, given_name=contributor.user.fullname, email=contributor.user.username, - provided_pid=preprint_id, ) contributor.user.save() updated_count += 1 From 36abc51bb9b06e707c85693ff22482d91a5aab90 Mon Sep 17 00:00:00 2001 From: Vlad0n20 <137097005+Vlad0n20@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:49:04 +0300 Subject: [PATCH 15/23] [ENG-7263] Fix/eng 7263 part 3 (#11119) --- ...unclaimed_records_for_preprint_versions.py | 2 ++ osf/models/user.py | 36 ++++++++++--------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py index 6ca1cd0e95f..17fca6a54df 100644 --- a/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py +++ b/osf/management/commands/fix_unclaimed_records_for_preprint_versions.py @@ -112,6 +112,7 @@ def update_unclaimed_records_for_preprint_versions(dry_run=False): referrer=referrer, given_name=record_info.get('name', None), email=record_info.get('email', None), + skip_referrer_permissions=True ) contributor.user.save() updated_count += 1 @@ -137,6 +138,7 @@ def update_unclaimed_records_for_preprint_versions(dry_run=False): referrer=current_preprint.creator, given_name=contributor.user.fullname, email=contributor.user.username, + skip_referrer_permissions=True ) contributor.user.save() updated_count += 1 diff --git a/osf/models/user.py b/osf/models/user.py index fa1d4ec254a..97d444698fd 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -1665,35 +1665,37 @@ def n_projects_in_common(self, other_user): """Returns number of "shared projects" (projects that both users are contributors or group members for)""" return self._projects_in_common_query(other_user).count() - def add_unclaimed_record(self, claim_origin, referrer, given_name, email=None): + def add_unclaimed_record(self, claim_origin, referrer, given_name, email=None, skip_referrer_permissions=False): """Add a new project entry in the unclaimed records dictionary. :param object claim_origin: Object this unclaimed user was added to. currently `Node` or `Provider` or `Preprint` :param User referrer: User who referred this user. :param str given_name: The full name that the referrer gave for this user. :param str email: The given email address. + :param bool skip_referrer_permissions: The flag to check permissions for referrer. :returns: The added record """ from .provider import AbstractProvider from .osf_group import OSFGroup - if isinstance(claim_origin, AbstractProvider): - if not bool(get_perms(referrer, claim_origin)): - raise PermissionsError( - f'Referrer does not have permission to add a moderator to provider {claim_origin._id}' - ) - - elif isinstance(claim_origin, OSFGroup): - if not claim_origin.has_permission(referrer, MANAGE): - raise PermissionsError( - f'Referrer does not have permission to add a member to {claim_origin._id}' - ) - else: - if not claim_origin.has_permission(referrer, ADMIN): - raise PermissionsError( - f'Referrer does not have permission to add a contributor to {claim_origin._id}' - ) + if not skip_referrer_permissions: + if isinstance(claim_origin, AbstractProvider): + if not bool(get_perms(referrer, claim_origin)): + raise PermissionsError( + f'Referrer does not have permission to add a moderator to provider {claim_origin._id}' + ) + + elif isinstance(claim_origin, OSFGroup): + if not claim_origin.has_permission(referrer, MANAGE): + raise PermissionsError( + f'Referrer does not have permission to add a member to {claim_origin._id}' + ) + else: + if not claim_origin.has_permission(referrer, ADMIN): + raise PermissionsError( + f'Referrer does not have permission to add a contributor to {claim_origin._id}' + ) pid = str(claim_origin._id) referrer_id = str(referrer._id) From dc59b8fc1f95b2876c98e81a7a568b25d28674be Mon Sep 17 00:00:00 2001 From: antkryt Date: Tue, 29 Apr 2025 21:54:29 +0300 Subject: [PATCH 16/23] [ENG-7716] Allow for reinstatement of previous preprint versions (with date uploaded) via the admin app (#11118) ## Purpose improve exception handling and minor fixes ## Changes - improve exception handling - supplemental node permission error - set subjects from relationship to keep valid hierarchy - ignore unfinished/unpublished versions ## Ticket https://openscience.atlassian.net/browse/ENG-7716 --- admin/preprints/views.py | 12 ++++++++++-- api/preprints/serializers.py | 2 +- osf/models/mixins.py | 4 ++-- osf/models/preprint.py | 38 ++++++++++++++++++++++-------------- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/admin/preprints/views.py b/admin/preprints/views.py index 36fdb877029..ef7d1860e76 100644 --- a/admin/preprints/views.py +++ b/admin/preprints/views.py @@ -24,6 +24,7 @@ from osf.exceptions import PreprintStateError from rest_framework.exceptions import PermissionDenied as DrfPermissionDenied +from framework.exceptions import PermissionsError from osf.management.commands.fix_preprints_has_data_links_and_why_no_data import process_wrong_why_not_data_preprints from osf.models import ( @@ -212,18 +213,25 @@ def post(self, request, *args, **kwargs): assign_version_number=1, auth=request, ignore_permission=True, + ignore_existing_versions=True, ) + data_to_update = data_to_update or dict() + primary_file = copy_files(preprint.primary_file, target_node=new_preprint, identifier__in=file_versions) + if primary_file is None: + raise ValueError(f"Primary file {preprint.primary_file.id} doesn't have following versions: {file_versions}") # rollback changes data_to_update['primary_file'] = primary_file # FIXME: currently it's not possible to ignore permission when update subjects # via serializer, remove this logic if deprecated subjects = data_to_update.pop('subjects', None) if subjects: - new_preprint.set_subjects([subjects], auth=request, ignore_permission=True) + new_preprint.set_subjects_from_relationships(subjects, auth=request, ignore_permission=True) PreprintSerializer(new_preprint, context={'request': request, 'ignore_permission': True}).update(new_preprint, data_to_update) - except DrfPermissionDenied as exc: + except ValueError as exc: + return HttpResponse(str(exc), status=400) + except (PermissionsError, DrfPermissionDenied) as exc: return HttpResponse(f'Not enough permissions to perform this action : {str(exc)}', status=400) return JsonResponse({'redirect': self.get_success_url(new_preprint._id)}) diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py index 1d7b0abd014..41e8fee203f 100644 --- a/api/preprints/serializers.py +++ b/api/preprints/serializers.py @@ -416,7 +416,7 @@ def update(self, preprint, validated_data): if 'node' in validated_data: node = validated_data.pop('node', None) - self.set_field(preprint.set_supplemental_node, node, auth, ignore_permission=ignore_permission) + self.set_field(preprint.set_supplemental_node, node, auth, ignore_node_permissions=ignore_permission, ignore_permission=ignore_permission) if 'subjects' in validated_data: subjects = validated_data.pop('subjects', None) diff --git a/osf/models/mixins.py b/osf/models/mixins.py index 6f1c7f36f6f..facc39af119 100644 --- a/osf/models/mixins.py +++ b/osf/models/mixins.py @@ -1191,7 +1191,7 @@ def set_subjects(self, new_subjects, auth, add_log=True, **kwargs): if hasattr(self, 'update_search'): self.update_search() - def set_subjects_from_relationships(self, subjects_list, auth, add_log=True): + def set_subjects_from_relationships(self, subjects_list, auth, add_log=True, **kwargs): """ Helper for setting M2M subjects field from list of flattened subjects received from UI. Only authorized admins may set subjects. @@ -1201,7 +1201,7 @@ def set_subjects_from_relationships(self, subjects_list, auth, add_log=True): :return: None """ - self.check_subject_perms(auth) + self.check_subject_perms(auth, **kwargs) self.assert_subject_format(subjects_list, expect_list=True, error_msg='Expecting a list of subjects.') if subjects_list: self.assert_subject_format(subjects_list[0], expect_list=False, error_msg='Expecting a list of subjects.') diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 6693490981d..cfe6e56afe6 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -404,7 +404,7 @@ def check_unfinished_or_unpublished_version(self): return None, None @classmethod - def create_version(cls, create_from_guid, auth, assign_version_number=None, ignore_permission=False): + def create_version(cls, create_from_guid, auth, assign_version_number=None, ignore_permission=False, ignore_existing_versions=False): """Create a new version for a given preprint. `create_from_guid` can be any existing versions of the preprint but `create_version` always finds the latest version and creates a new version from it. In addition, this creates an "incomplete" new preprint version object using the model class and returns both the new object and @@ -425,19 +425,21 @@ def create_version(cls, create_from_guid, auth, assign_version_number=None, igno sentry.log_message(f'ADMIN permission for the latest version is required to create a new version: ' f'[user={auth.user._id}, guid={guid_obj._id}, latest_version={latest_version._id}]') raise PermissionsError - unfinished_version, unpublished_version = latest_version.check_unfinished_or_unpublished_version() - if unpublished_version: - logger.error('Failed to create a new version due to unpublished pending version already exists: ' - f'[version={unpublished_version.version}, ' - f'_id={unpublished_version._id}, ' - f'state={unpublished_version.machine_state}].') - raise UnpublishedPendingPreprintVersionExists - if unfinished_version: - logger.warning(f'Use existing initiated but unfinished version instead of creating a new one: ' - f'[version={unfinished_version.version}, ' - f'_id={unfinished_version._id}, ' - f'state={unfinished_version.machine_state}].') - return unfinished_version, None + if not ignore_existing_versions: + unfinished_version, unpublished_version = latest_version.check_unfinished_or_unpublished_version() + if unpublished_version: + message = ('Failed to create a new version due to unpublished pending version already exists: ' + f'[version={unpublished_version.version}, ' + f'_id={unpublished_version._id}, ' + f'state={unpublished_version.machine_state}].') + logger.error(message) + raise UnpublishedPendingPreprintVersionExists(message) + if unfinished_version: + logger.warning(f'Use existing initiated but unfinished version instead of creating a new one: ' + f'[version={unfinished_version.version}, ' + f'_id={unfinished_version._id}, ' + f'state={unfinished_version.machine_state}].') + return unfinished_version, None # Prepare the data to clone/update data_to_update = { @@ -536,7 +538,13 @@ def create_version(cls, create_from_guid, auth, assign_version_number=None, igno guid_obj.save() if latest_version.node: - preprint.set_supplemental_node(latest_version.node, auth, save=False, ignore_node_permissions=True) + preprint.set_supplemental_node( + latest_version.node, + auth, + save=False, + ignore_node_permissions=True, + ignore_permission=ignore_permission + ) return preprint, data_to_update From 165568754bf4caf4339c9a5ed9e421e2aa56decc Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Tue, 8 Apr 2025 13:58:37 +0300 Subject: [PATCH 17/23] Add new task to update node metadata with verified_links --- .../datacite/datacite_tree_walker.py | 18 ++++++++++++ website/identifiers/tasks.py | 29 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 640e00e76f0..aa02f8193fc 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -113,6 +113,7 @@ def walk(self, doi_override=None): self._visit_rights(self.root) self._visit_descriptions(self.root, self.basket.focus.iri) self._visit_funding_references(self.root) + self._visit_verified_links(self.root) self._visit_related(self.root) def _visit_identifier(self, parent_el, *, doi_override=None): @@ -432,3 +433,20 @@ def _get_resource_type_general(self, focus_iri): if isinstance(type_term, rdflib.URIRef) and type_term.startswith(DATACITE): return without_namespace(type_term, DATACITE) return 'Text' + + def _visit_verified_links(self, parent_el): + osf_item = self.basket.focus.dbmodel + verified_links = getattr(osf_item, 'verified_links', None) + resource_type = getattr(osf_item, 'link_resource_type', 'Text') + + if verified_links and isinstance(verified_links, list): + related_identifiers_el = self.visit(parent_el, 'relatedIdentifiers', is_list=True) + for link in verified_links: + if link and isinstance(link, str) and smells_like_iri(link): + self.visit(related_identifiers_el, 'relatedIdentifier', text=link, attrib={ + 'relatedIdentifierType': 'URL', + 'relationType': 'IsReferencedBy', + 'resourceTypeGeneral': resource_type + }) + else: + logger.warning('skipping non-URL verified link "%s"', link) diff --git a/website/identifiers/tasks.py b/website/identifiers/tasks.py index f940956d54b..5c10b59a733 100644 --- a/website/identifiers/tasks.py +++ b/website/identifiers/tasks.py @@ -17,3 +17,32 @@ def task__update_doi_metadata_on_change(self, target_guid): @celery_app.task(ignore_results=True) def update_doi_metadata_on_change(target_guid): task__update_doi_metadata_on_change(target_guid) + +@celery_app.task(bind=True, max_retries=5, acks_late=True) +def task__update_doi_metadata_with_verified_links(self, target_guid, verified_links=None, link_resource_type='Text'): + sentry.log_message('Updating DOI with verified links for guid', + extra_data={'guid': target_guid, 'verified_links': verified_links, 'link_resource_type': link_resource_type}, + level=logging.INFO) + + Guid = apps.get_model('osf.Guid') + target_object = Guid.load(target_guid).referent + try: + if verified_links is not None: + target_object.verified_links = verified_links + target_object.link_resource_type = link_resource_type + + target_object.request_identifier_update(category='doi') + + sentry.log_message('DOI metadata with verified links updated for guid', + extra_data={'guid': target_guid}, + level=logging.INFO) + except Exception as exc: + sentry.log_message('Failed to update DOI metadata with verified links', + extra_data={'guid': target_guid, 'error': str(exc)}, + level=logging.ERROR) + raise self.retry(exc=exc) + +@queued_task +@celery_app.task(ignore_results=True) +def update_doi_metadata_with_verified_links(target_guid, verified_links=None, resource_type='Text'): + task__update_doi_metadata_with_verified_links.apply_async((target_guid, verified_links, resource_type)) From daa2b76255a0f189416712d6bd5fa3d2f8237d4d Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Tue, 29 Apr 2025 17:14:32 +0300 Subject: [PATCH 18/23] Update task to change node metadata to use gv request to get verified links --- .../datacite/datacite_tree_walker.py | 43 ++++++++++--------- osf/models/node.py | 4 ++ website/identifiers/clients/datacite.py | 3 ++ website/identifiers/tasks.py | 11 ++--- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index aa02f8193fc..4660ee7ba4f 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -113,8 +113,7 @@ def walk(self, doi_override=None): self._visit_rights(self.root) self._visit_descriptions(self.root, self.basket.focus.iri) self._visit_funding_references(self.root) - self._visit_verified_links(self.root) - self._visit_related(self.root) + self._visit_related_and_verified_links(self.root) def _visit_identifier(self, parent_el, *, doi_override=None): if doi_override is None: @@ -374,13 +373,16 @@ def _visit_related_identifier_and_item(self, identifier_parent_el, item_parent_e self._visit_publication_year(related_item_el, related_iri) self._visit_publisher(related_item_el, related_iri) - def _visit_related(self, parent_el): + def _visit_related_and_verified_links(self, parent_el): relation_pairs = set() for relation_iri, datacite_relation in RELATED_IDENTIFIER_TYPE_MAP.items(): for related_iri in self.basket[relation_iri]: relation_pairs.add((datacite_relation, related_iri)) + related_identifiers_el = self.visit(parent_el, 'relatedIdentifiers', is_list=True) related_items_el = self.visit(parent_el, 'relatedItems', is_list=True) + + # First add regular related identifiers for datacite_relation, related_iri in sorted(relation_pairs): self._visit_related_identifier_and_item( related_identifiers_el, @@ -389,6 +391,24 @@ def _visit_related(self, parent_el): datacite_relation, ) + # Then add verified links to same relatedIdentifiers element + osf_item = self.basket.focus.dbmodel + from osf.models import AbstractNode + + if isinstance(osf_item, AbstractNode): + gv_verified_link_list = osf_item.get_links() + for item in gv_verified_link_list: + verified_link, resource_type = item.get('target_url', None), item.get('resource_type', None) + if verified_link and resource_type: + if verified_link and isinstance(verified_link, str) and smells_like_iri(verified_link): + self.visit(related_identifiers_el, 'relatedIdentifier', text=verified_link, attrib={ + 'relatedIdentifierType': 'URL', + 'relationType': 'IsReferencedBy', + 'resourceTypeGeneral': resource_type.title() + }) + else: + logger.warning('skipping non-URL verified link "%s"', verified_link) + def _visit_name_identifiers(self, parent_el, agent_iri): for identifier in sorted(self.basket[agent_iri:DCTERMS.identifier]): identifier_type, identifier_value = self._identifier_type_and_value(identifier) @@ -433,20 +453,3 @@ def _get_resource_type_general(self, focus_iri): if isinstance(type_term, rdflib.URIRef) and type_term.startswith(DATACITE): return without_namespace(type_term, DATACITE) return 'Text' - - def _visit_verified_links(self, parent_el): - osf_item = self.basket.focus.dbmodel - verified_links = getattr(osf_item, 'verified_links', None) - resource_type = getattr(osf_item, 'link_resource_type', 'Text') - - if verified_links and isinstance(verified_links, list): - related_identifiers_el = self.visit(parent_el, 'relatedIdentifiers', is_list=True) - for link in verified_links: - if link and isinstance(link, str) and smells_like_iri(link): - self.visit(related_identifiers_el, 'relatedIdentifier', text=link, attrib={ - 'relatedIdentifierType': 'URL', - 'relationType': 'IsReferencedBy', - 'resourceTypeGeneral': resource_type - }) - else: - logger.warning('skipping non-URL verified link "%s"', link) diff --git a/osf/models/node.py b/osf/models/node.py index 5c60e3f0a3c..db1950827f8 100644 --- a/osf/models/node.py +++ b/osf/models/node.py @@ -1998,6 +1998,10 @@ def on_update(self, first_save, saved_fields): request_headers = string_type_request_headers(request) self.update_or_enqueue_on_node_updated(user_id, first_save, saved_fields) + from website.identifiers.tasks import update_doi_metadata_with_verified_links + if self.get_identifier('doi') and bool(self.IDENTIFIER_UPDATE_FIELDS.intersection(set(saved_fields))): + update_doi_metadata_with_verified_links(self._id) + user = User.load(user_id) if user: # Specifically call the super class save method to avoid recursion into model save method. diff --git a/website/identifiers/clients/datacite.py b/website/identifiers/clients/datacite.py index 64c9b0a075b..66bb2f924b6 100644 --- a/website/identifiers/clients/datacite.py +++ b/website/identifiers/clients/datacite.py @@ -57,6 +57,9 @@ def create_identifier(self, node, category, doi_value=None): doi_value = doi_value or self._get_doi_value(node) metadata_record_xml = self.build_metadata(node, doi_value, as_xml=True) if settings.DATACITE_ENABLED: + if isinstance(metadata_record_xml, bytes): + metadata_record_xml = metadata_record_xml.decode('utf-8') + resp = self._client.metadata_post(metadata_record_xml) # Typical response: 'OK (10.70102/FK2osf.io/cq695)' to doi 10.70102/FK2osf.io/cq695 doi = re.match(r'OK \((?P[a-zA-Z0-9 .\/]{0,})\)', resp).groupdict()['doi'] diff --git a/website/identifiers/tasks.py b/website/identifiers/tasks.py index 5c10b59a733..277df0c5b53 100644 --- a/website/identifiers/tasks.py +++ b/website/identifiers/tasks.py @@ -19,17 +19,14 @@ def update_doi_metadata_on_change(target_guid): task__update_doi_metadata_on_change(target_guid) @celery_app.task(bind=True, max_retries=5, acks_late=True) -def task__update_doi_metadata_with_verified_links(self, target_guid, verified_links=None, link_resource_type='Text'): +def task__update_doi_metadata_with_verified_links(self, target_guid): sentry.log_message('Updating DOI with verified links for guid', - extra_data={'guid': target_guid, 'verified_links': verified_links, 'link_resource_type': link_resource_type}, + extra_data={'guid': target_guid}, level=logging.INFO) Guid = apps.get_model('osf.Guid') target_object = Guid.load(target_guid).referent try: - if verified_links is not None: - target_object.verified_links = verified_links - target_object.link_resource_type = link_resource_type target_object.request_identifier_update(category='doi') @@ -44,5 +41,5 @@ def task__update_doi_metadata_with_verified_links(self, target_guid, verified_li @queued_task @celery_app.task(ignore_results=True) -def update_doi_metadata_with_verified_links(target_guid, verified_links=None, resource_type='Text'): - task__update_doi_metadata_with_verified_links.apply_async((target_guid, verified_links, resource_type)) +def update_doi_metadata_with_verified_links(target_guid): + task__update_doi_metadata_with_verified_links(target_guid) From b309399932c8d10002ec65f414fd49d69840bb4b Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Thu, 1 May 2025 15:12:28 +0300 Subject: [PATCH 19/23] Add mock for gravy valet get_links to fix failed tests --- api_tests/identifiers/managment_commands/test_sync_dois.py | 2 +- conftest.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/api_tests/identifiers/managment_commands/test_sync_dois.py b/api_tests/identifiers/managment_commands/test_sync_dois.py index e72893dc7dd..c36e29080e0 100644 --- a/api_tests/identifiers/managment_commands/test_sync_dois.py +++ b/api_tests/identifiers/managment_commands/test_sync_dois.py @@ -58,7 +58,7 @@ def test_doi_synced_datacite(self, app, registration, registration_identifier, m assert registration_identifier.modified.date() < datetime.datetime.now().date() call_command('sync_doi_metadata', f'-m={datetime.datetime.now()}') - assert len(mock_datacite.calls) == 2 + assert len(mock_datacite.calls) == 3 update_metadata, update_doi = mock_datacite.calls assert update_metadata.request.url == f'{settings.DATACITE_URL}/metadata' assert update_doi.request.url == f'{settings.DATACITE_URL}/doi' diff --git a/conftest.py b/conftest.py index 6f870093ed4..c183b5faac0 100644 --- a/conftest.py +++ b/conftest.py @@ -357,3 +357,10 @@ def helpful_thing(self): ``` """ yield from rolledback_transaction('function_transaction') + +@pytest.fixture(autouse=True) +def mock_gravy_valet_get_links(): + + with mock.patch('osf.external.gravy_valet.translations.get_links') as mock_get_links: + mock_get_links.return_value = [] + yield mock_get_links From 09df0f1c62cdc6941ed9bd91112ab2b19fc6b78a Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Thu, 1 May 2025 15:52:01 +0300 Subject: [PATCH 20/23] Fix test_doi_synced_datacite test --- api_tests/identifiers/managment_commands/test_sync_dois.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_tests/identifiers/managment_commands/test_sync_dois.py b/api_tests/identifiers/managment_commands/test_sync_dois.py index c36e29080e0..e72893dc7dd 100644 --- a/api_tests/identifiers/managment_commands/test_sync_dois.py +++ b/api_tests/identifiers/managment_commands/test_sync_dois.py @@ -58,7 +58,7 @@ def test_doi_synced_datacite(self, app, registration, registration_identifier, m assert registration_identifier.modified.date() < datetime.datetime.now().date() call_command('sync_doi_metadata', f'-m={datetime.datetime.now()}') - assert len(mock_datacite.calls) == 3 + assert len(mock_datacite.calls) == 2 update_metadata, update_doi = mock_datacite.calls assert update_metadata.request.url == f'{settings.DATACITE_URL}/metadata' assert update_doi.request.url == f'{settings.DATACITE_URL}/doi' From dfdf1566eca31fbbc6b0c61780a3b0c201adfa0e Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Mon, 5 May 2025 14:25:22 +0300 Subject: [PATCH 21/23] Update PR accordng to comments --- conftest.py | 2 +- .../datacite/datacite_tree_walker.py | 13 +++++++++---- osf/models/identifiers.py | 4 ++-- website/identifiers/clients/datacite.py | 2 -- website/identifiers/tasks.py | 19 ++++++++----------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/conftest.py b/conftest.py index c183b5faac0..a44c3c20739 100644 --- a/conftest.py +++ b/conftest.py @@ -361,6 +361,6 @@ def helpful_thing(self): @pytest.fixture(autouse=True) def mock_gravy_valet_get_links(): - with mock.patch('osf.external.gravy_valet.translations.get_links') as mock_get_links: + with mock.patch('osf.external.gravy_valet.translations.get_verified_links') as mock_get_links: mock_get_links.return_value = [] yield mock_get_links diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 4660ee7ba4f..7e772b1666b 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -6,7 +6,9 @@ import rdflib +from framework import sentry from osf.exceptions import MetadataSerializationError +from osf.external.gravy_valet.request_helpers import get_verified_links from osf.metadata import gather from osf.metadata.rdfutils import ( RDF, @@ -396,18 +398,21 @@ def _visit_related_and_verified_links(self, parent_el): from osf.models import AbstractNode if isinstance(osf_item, AbstractNode): - gv_verified_link_list = osf_item.get_links() + gv_verified_link_list = get_verified_links(node_guid=osf_item._id) + non_url_verified_links = [] for item in gv_verified_link_list: - verified_link, resource_type = item.get('target_url', None), item.get('resource_type', None) + verified_link, resource_type = item.attributes.get('target_url', None), item.attributes.get('resource_type', None) if verified_link and resource_type: - if verified_link and isinstance(verified_link, str) and smells_like_iri(verified_link): + if smells_like_iri(verified_link): self.visit(related_identifiers_el, 'relatedIdentifier', text=verified_link, attrib={ 'relatedIdentifierType': 'URL', 'relationType': 'IsReferencedBy', 'resourceTypeGeneral': resource_type.title() }) else: - logger.warning('skipping non-URL verified link "%s"', verified_link) + non_url_verified_links.append(verified_link) + if non_url_verified_links: + sentry.log_message(f'Skipped - {','.join(non_url_verified_links)} for node {osf_item._id}') def _visit_name_identifiers(self, parent_el, agent_iri): for identifier in sorted(self.basket[agent_iri:DCTERMS.identifier]): diff --git a/osf/models/identifiers.py b/osf/models/identifiers.py index 49be9f9c45b..9383bc61484 100644 --- a/osf/models/identifiers.py +++ b/osf/models/identifiers.py @@ -86,8 +86,8 @@ def request_identifier(self, category): def request_identifier_update(self, category, create=False): '''Noop if no existing identifier value for the category.''' - if not self.get_identifier_value(category) and not create: - return + # if not self.get_identifier_value(category) and not create: + # return client = self.get_doi_client() if client: diff --git a/website/identifiers/clients/datacite.py b/website/identifiers/clients/datacite.py index 66bb2f924b6..be4e4197c7b 100644 --- a/website/identifiers/clients/datacite.py +++ b/website/identifiers/clients/datacite.py @@ -57,8 +57,6 @@ def create_identifier(self, node, category, doi_value=None): doi_value = doi_value or self._get_doi_value(node) metadata_record_xml = self.build_metadata(node, doi_value, as_xml=True) if settings.DATACITE_ENABLED: - if isinstance(metadata_record_xml, bytes): - metadata_record_xml = metadata_record_xml.decode('utf-8') resp = self._client.metadata_post(metadata_record_xml) # Typical response: 'OK (10.70102/FK2osf.io/cq695)' to doi 10.70102/FK2osf.io/cq695 diff --git a/website/identifiers/tasks.py b/website/identifiers/tasks.py index 277df0c5b53..c00a3629b2e 100644 --- a/website/identifiers/tasks.py +++ b/website/identifiers/tasks.py @@ -1,10 +1,13 @@ import logging from django.apps import apps +from osf.external.gravy_valet.exceptions import GVException from framework.celery_tasks import app as celery_app from framework.celery_tasks.handlers import queued_task from framework import sentry +logger = logging.getLogger(__name__) + @celery_app.task(bind=True, max_retries=5, acks_late=True) def task__update_doi_metadata_on_change(self, target_guid): sentry.log_message('Updating DOI for guid', extra_data={'guid': target_guid}, level=logging.INFO) @@ -20,9 +23,7 @@ def update_doi_metadata_on_change(target_guid): @celery_app.task(bind=True, max_retries=5, acks_late=True) def task__update_doi_metadata_with_verified_links(self, target_guid): - sentry.log_message('Updating DOI with verified links for guid', - extra_data={'guid': target_guid}, - level=logging.INFO) + logger.info(f'Updating DOI with verified links for guid - {target_guid}') Guid = apps.get_model('osf.Guid') target_object = Guid.load(target_guid).referent @@ -30,14 +31,10 @@ def task__update_doi_metadata_with_verified_links(self, target_guid): target_object.request_identifier_update(category='doi') - sentry.log_message('DOI metadata with verified links updated for guid', - extra_data={'guid': target_guid}, - level=logging.INFO) - except Exception as exc: - sentry.log_message('Failed to update DOI metadata with verified links', - extra_data={'guid': target_guid, 'error': str(exc)}, - level=logging.ERROR) - raise self.retry(exc=exc) + logger.info(f'DOI metadata with verified links updated for guid - {target_guid}') + except GVException as e: + logger.info(f'Failed to update DOI metadata with verified links for guid - {target_guid}') + raise self.retry(exc=e) @queued_task @celery_app.task(ignore_results=True) From b0281661e2db18d030236e1e69e857bc94d56f8c Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Mon, 5 May 2025 16:22:57 +0300 Subject: [PATCH 22/23] Add handling empty response from gv --- .../datacite/datacite_tree_walker.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 7e772b1666b..1e3d7694522 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -398,21 +398,22 @@ def _visit_related_and_verified_links(self, parent_el): from osf.models import AbstractNode if isinstance(osf_item, AbstractNode): - gv_verified_link_list = get_verified_links(node_guid=osf_item._id) - non_url_verified_links = [] - for item in gv_verified_link_list: - verified_link, resource_type = item.attributes.get('target_url', None), item.attributes.get('resource_type', None) - if verified_link and resource_type: - if smells_like_iri(verified_link): - self.visit(related_identifiers_el, 'relatedIdentifier', text=verified_link, attrib={ - 'relatedIdentifierType': 'URL', - 'relationType': 'IsReferencedBy', - 'resourceTypeGeneral': resource_type.title() - }) - else: - non_url_verified_links.append(verified_link) - if non_url_verified_links: - sentry.log_message(f'Skipped - {','.join(non_url_verified_links)} for node {osf_item._id}') + gv_verified_link_list = list(get_verified_links(node_guid=osf_item._id)) + if gv_verified_link_list: + non_url_verified_links = [] + for item in gv_verified_link_list: + verified_link, resource_type = item.attributes.get('target_url', None), item.attributes.get('resource_type', None) + if verified_link and resource_type: + if smells_like_iri(verified_link): + self.visit(related_identifiers_el, 'relatedIdentifier', text=verified_link, attrib={ + 'relatedIdentifierType': 'URL', + 'relationType': 'IsReferencedBy', + 'resourceTypeGeneral': resource_type.title() + }) + else: + non_url_verified_links.append(verified_link) + if non_url_verified_links: + sentry.log_message(f'Skipped - {','.join(non_url_verified_links)} for node {osf_item._id}') def _visit_name_identifiers(self, parent_el, agent_iri): for identifier in sorted(self.basket[agent_iri:DCTERMS.identifier]): From 01a7438bf4a5d19d125ed9dec8df1c3af27f1eae Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Mon, 5 May 2025 16:52:40 +0300 Subject: [PATCH 23/23] Add handling empty response from gv --- conftest.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conftest.py b/conftest.py index a44c3c20739..eefd8ee6e3d 100644 --- a/conftest.py +++ b/conftest.py @@ -360,7 +360,6 @@ def helpful_thing(self): @pytest.fixture(autouse=True) def mock_gravy_valet_get_links(): - - with mock.patch('osf.external.gravy_valet.translations.get_verified_links') as mock_get_links: - mock_get_links.return_value = [] + with mock.patch('osf.external.gravy_valet.request_helpers.iterate_gv_results') as mock_get_links: + mock_get_links.return_value = iter([]) yield mock_get_links