Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

names: remove internal id from sync #300

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions site/cds_rdm/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def sync_local_accounts_to_names(since=None, user_id=None):
orcid = user.user_profile.get("orcid")
try:
# 1. Prefer ORCID: if the user has an ORCID, we update the ORCID name with the
# CERN user info, and we unlist any previous CERN-only name
# CERN user info, and we also update and unlist any previous CERN-only name
current_app.logger.debug(
f"Names sync | Fetching ORCID name for user {user.id}."
)
Expand All @@ -125,7 +125,11 @@ def sync_local_accounts_to_names(since=None, user_id=None):
f"Names sync | Unlisting CERN name for user {user.id}."
)
names_utils.update_name(user, cern_name, unlist=True, uow=uow)

else:
current_app.logger.debug(
f"Names sync | No CERN name found for user {user.id}. Creating new unlisted name."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry I am a bit confused by the message. What is the use case for creating unlisted CERN name? is it when we have orcid entry but no related CERN user was found? why to create a CERN name at all?
how I understand the case when it is not needed to create unlisted:

  1. I get an ORCID entry (from the dump). Most likely not a CERN researcher
  2. I search for CERN user, I find none, because it is not a CERN person
  3. I create a CERN name anyway, unlisted?

please let if I understood incorrectly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the discussion we had (we didn't specify this concrete case in the issue but the third point more or less describes it), I remember that we agreed to remove the person id from the internal_id but we keep the name when syncing it with ORCID value to have some sort of "history" and I guess it will be required for the migration, as we will match the unlisted value and not the ORCID value since there is no person id there anymore.

From now on, every time we sync if we find the ORCID value we create the unlisted CERN value as well, before it was simply merging to the ORCID value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I think I understand now.
My initial wrong assumption was that ORCID dump is the source of truth, but now I realise you iterate on user profiles. Makes sense now.
Can you add a comment above this log message, explaining why we do this unlisted name creation?

)
names_utils.create_new_name(user, unlist=True, uow=uow)
current_app.logger.debug(
f"Names sync | Updating ORCID name for user {user.id}."
)
Expand Down
22 changes: 4 additions & 18 deletions site/cds_rdm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,6 @@ def add_or_update_email(self, user, name, updated_name, updated=False):
updated = True
return updated

def add_person_id(self, user, name, updated_name, updated=False):
"""Adds the person id to the name.

:param user: The user object.
:param name: The name dictionary.
:param updated_name: The updated name dictionary.
:param updated: If the name has already been updated.
:return: Boolean indicating if the name was updated.
"""
person_id = user.user_profile.get("person_id")
if person_id and not name.get("internal_id"):
updated_name["internal_id"] = person_id
updated = True
return updated

def check_if_update_needed(self, user, name, unlist=False):
"""Check if the name needs to be updated.

Expand All @@ -148,7 +133,6 @@ def check_if_update_needed(self, user, name, unlist=False):
update_functions = [
self.add_affiliations,
self.add_orcid,
self.add_person_id,
self.add_or_update_props,
self.add_or_update_email,
]
Expand Down Expand Up @@ -182,7 +166,7 @@ def update_name(self, user, name_dict, unlist=False, uow=None):
except ValidationError as e:
current_app.logger.error(f"Error updating name for user {user.id}: {e}")

def create_new_name(self, user, uow=None):
def create_new_name(self, user, unlist=False, uow=None):
"""Creates a new name for the user.

:param user: The user object.
Expand All @@ -191,7 +175,6 @@ def create_new_name(self, user, uow=None):
default_props = self.get_default_props(user.id)
name = {
"id": str(user.user_profile["person_id"]),
"internal_id": str(user.user_profile["person_id"]),
"props": default_props,
"identifiers": [],
}
Expand All @@ -217,6 +200,9 @@ def create_new_name(self, user, uow=None):
if user.email:
name["props"]["email"] = user.email

if unlist:
name["tags"] = ["unlisted"]

try:
self.service.create(system_identity, name, uow=uow)
except ValidationError as e:
Expand Down
9 changes: 6 additions & 3 deletions site/tests/test_merge_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ def test_sync_name_with_existing_orcid(app, database, user_3, name_user_3):
Name.index.refresh()

names = service.scan(system_identity)
assert len(list(names.hits)) == 1
# Creates the CDS name unlisted and the ORCID name
assert len(list(names.hits)) == 2

cds_name = service.read(system_identity, user_3.user_profile["person_id"])
assert cds_name.data["tags"] == ["unlisted"]

filter = dsl.Q(
"bool",
Expand All @@ -40,9 +44,8 @@ def test_sync_name_with_existing_orcid(app, database, user_3, name_user_3):
],
)

# Since the ORCID value is present no CDS name is created but the user data is merged to the ORCID one
os_name = service.search(system_identity, extra_filter=filter)
assert os_name.total == 1
assert os_name.total == 2

name = service.read(system_identity, id_)

Expand Down
10 changes: 7 additions & 3 deletions site/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def test_sync_name_with_existing_orcid(app, database, user_3, name_user_3):
Name.index.refresh()

names = service.scan(system_identity)
assert len(list(names.hits)) == 4
# There should be user 1, user 1 orcid value, user 2, user 3 and user 3 orcid value
assert len(list(names.hits)) == 5

filter = dsl.Q(
"bool",
Expand All @@ -85,9 +86,12 @@ def test_sync_name_with_existing_orcid(app, database, user_3, name_user_3):
],
)

# Since the ORCID value is present no CDS name is created but the user data is merged to the ORCID one
# The ORCID value is present but the CDS name is created anyway unlisted
os_name = service.search(system_identity, extra_filter=filter)
assert os_name.total == 1
assert os_name.total == 2

cds_name = service.read(system_identity, user_3.user_profile["person_id"])
assert cds_name.data["tags"] == ["unlisted"]

name = service.read(system_identity, id_)

Expand Down