* identifiers: integrate internal_id

* identifiers: distinguish inspire id from legacy cds * identifiers: integrate internal_id * chore: fix tests * installation: upgrade invenio-vocabularies
CERNDocumentServer · Dec 12, 2024 · ccc25e5 · ccc25e5
1 parent ae2d9d2
commit ccc25e5
Show file tree

Hide file tree

Showing 8 changed files with 233 additions and 174 deletions.
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/invenio.cfg b/invenio.cfg
@@ -37,7 +37,9 @@ from invenio_cern_sync.users.profile import CERNUserProfileSchema
 from invenio_oauthclient.views.client import auto_redirect_login
 from invenio_cern_sync.sso import cern_remote_app_name, cern_keycloak
 
-from invenio_vocabularies.config import VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES
+from invenio_vocabularies.config import \
+    VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES
+
 
 def _(x):  # needed to avoid start time failure with lazy strings
     return x
@@ -228,7 +230,6 @@ CERN_SYNC_KEYCLOAK_BASE_URL = "https://auth.cern.ch/"
 CERN_SYNC_AUTHZ_BASE_URL = "https://authorization-service-api.web.cern.ch/"
 INVENIO_CERN_SYNC_KEYCLOAK_BASE_URL = "https://auth.cern.ch/"  # set env var when testing
 
-
 OAUTHCLIENT_CERN_REALM_URL = cern_keycloak.realm_url
 OAUTHCLIENT_CERN_USER_INFO_URL = cern_keycloak.user_info_url
 OAUTHCLIENT_CERN_VERIFY_EXP = True
@@ -286,7 +287,7 @@ CDS_EOS_OFFLOAD_REDIRECT_BASE_PATH = ""
 
 # CDS Migration
 CDS_REDIRECTION_COLLECTIONS_MAPPING = {
-    "CERN Students Projects": "", # uuid
+    "CERN Students Projects": "",  # uuid
 }
 
 RDM_PERMISSION_POLICY = CDSRDMRecordPermissionPolicy
@@ -476,8 +477,8 @@ RDM_CUSTOM_FIELDS_UI = [
         ]
     }
 ]
-RDM_FILES_DEFAULT_QUOTA_SIZE = 50 * 10**9     # 50GB
-RDM_FILES_DEFAULT_MAX_FILE_SIZE = 50 * 10**9  # 50GB
+RDM_FILES_DEFAULT_QUOTA_SIZE = 50 * 10 ** 9  # 50GB
+RDM_FILES_DEFAULT_MAX_FILE_SIZE = 50 * 10 ** 9  # 50GB
 
 JOBS_ADMINISTRATION_ENABLED = True
 
@@ -494,22 +495,28 @@ RDM_RECORDS_IDENTIFIERS_SCHEMES = {**RDM_RECORDS_IDENTIFIERS_SCHEMES,
 
 RDM_RECORDS_PERSONORG_SCHEMES = {**RDM_RECORDS_PERSONORG_SCHEMES,
                                  **{"inspire": {"label": _("Inspire"),
-                                                "validator": schemes.is_inspire,
-                                                "datacite": "INSPIRE"}}}
+                                                "validator": schemes.is_inspire_author,
+                                                "datacite": "INSPIRE"},
+                                    "lcds": {"label": _("CDS"),
+                                             "validator": schemes.is_legacy_cds,
+                                             "datacite": "CDS"}
+                                    }
+                                 }
 
 ### Do not require DOIs for record and parent
 # RDM_PERSISTENT_IDENTIFIERS["doi"]["required"] = False
 # RDM_PARENT_PERSISTENT_IDENTIFIERS["doi"]["required"] = False
 
-
 # Invenio-Preservation-Sync
 # =========================
 
 PRESERVATION_SYNC_ENABLED = True
 
+
 def resolve_record_pid(pid):
     return record_service.record_cls.pid.resolve(pid).id
 
+
 PRESERVATION_SYNC_PID_RESOLVER = resolve_record_pid
 
 PRESERVATION_SYNC_PERMISSION_POLICY = CDSRDMPreservationSyncPermissionPolicy
@@ -529,6 +536,9 @@ APP_RDM_RECORD_LANDING_PAGE_EXTERNAL_LINKS = [
 ]
 VOCABULARIES_NAMES_SCHEMES = {
     **DEFAULT_VOCABULARIES_NAMES_SCHEMES,
-    "cern": {"label": _("CERN"), "validator": schemes.is_cern, "datacite": "CERN"},
+    "inspire": {"label": _("Inspire"),
+                "validator": schemes.is_inspire_author,
+                "datacite": "INSPIRE"},
+    "lcds": {"label": _("CDS"), "validator": schemes.is_legacy_cds, "datacite": "CDS"},
 }
 """Names allowed identifier schemes."""
diff --git a/site/cds_rdm/schemes.py b/site/cds_rdm/schemes.py
@@ -7,7 +7,7 @@
 # under the terms of the MIT License; see LICENSE file for more details.
 #
 
-"""CDS speficic identifier schemes."""
+"""CDS specific identifier schemes."""
 
 import re
 
@@ -23,6 +23,8 @@ def cds_reference_number():
 
 
 aleph_regexp = re.compile(r"\d+CER$", flags=re.I)
+inspire_regexp = re.compile(r"\d+$", flags=re.I)
+inspire_author_regexp = re.compile(r"INSPIRE-\d+$", flags=re.I)
 
 
 def is_aleph(val):
@@ -44,18 +46,20 @@ def aleph():
     }
 
 
-inspire_regexp = re.compile(r"\d+$", flags=re.I)
-
-
 def is_inspire(val):
-    """Test if argument is a PubMed ID.
+    """Test if argument is an inspire ID.
 
-    Warning: PMID are just integers, with no structure, so this function will
-    say any integer is a PubMed ID
+    Warning: INSPIRE IDs are just integers, with no structure, so this function will
+    say any integer is an INSPIRE id
     """
     return inspire_regexp.match(val)
 
 
+def is_inspire_author(val):
+    """Test if argument is an inspire author ID."""
+    return inspire_author_regexp.match(val)
+
+
 def inspire():
     """Define validator for inspire."""
     return {
@@ -66,17 +70,17 @@ def inspire():
     }
 
 
-def is_cern(val):
-    """Test if argument is a valid CERN person ID."""
-    pattern = r"^\d+$"
-    return bool(re.match(pattern, val))
-
-
-def cern_person_id():
-    """Define validator for CERN person ID."""
+def inspire_author():
+    """Define validator for inspire author."""
     return {
-        "validator": is_cern,
+        "validator": is_inspire_author,
         "normalizer": lambda value: value,
-        "filter": [""],
+        "filter": ["inspire"],
         "url_generator": None,
     }
+
+
+def is_legacy_cds(val):
+    """Test if argument is a valid CERN person ID."""
+    pattern = r"^\d+$"
+    return bool(re.match(pattern, val))