Skip to content

Commit

Permalink
feat: ✨ doi registration (#9)
Browse files Browse the repository at this point in the history
* 🚧 wip: add doi registration

* 🚨 chore: appease the linter

* 🎨 style: fix code style issues with Black

* ✨ feat: add doi generation from test.datacite

* 🚧 wip: create payload to register datasets doi

* chore: generate mermaid diagrams

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* 🎨 style: fix code style issues with Black

* 🚧 wip: gather all date for doi registration payload

* 🚧 wip: correcting payload for doi registration

* 🎨 style: fix code style issues with Black

* ✨ feat: doi registration complete

* 🎨 style: fix code style issues with Black

---------

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Sanjay Soundarajan <[email protected]>
Co-authored-by: Lint Action <[email protected]>
Co-authored-by: slugb0t <[email protected]>
  • Loading branch information
4 people authored Feb 27, 2024
1 parent 188f97b commit 3510fe6
Show file tree
Hide file tree
Showing 10 changed files with 579 additions and 54 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Lint

on: [push]

jobs:
lint:
name: Run linters
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: pip install -r requirements.txt

- name: Lint with flake8
run: poe flake8

- name: Type check with mypy
run: poe typecheck

- name: Lint with pylint
run: poe pylint
11 changes: 11 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Configuration for the application."""

from os import environ
from pathlib import Path

Expand Down Expand Up @@ -32,5 +33,15 @@ def get_env(key, optional=False):
FAIRHUB_DATABASE_USER = get_env("FAIRHUB_DATABASE_USER", optional=True)
FAIRHUB_DATABASE_PORT = get_env("FAIRHUB_DATABASE_PORT", optional=True)

FAIRHUB_ENVIRONMENT = get_env("FAIRHUB_ENVIRONMENT")

DATACITE_CREDENTIALS = get_env("DATACITE_CREDENTIALS", optional=True)

AZURE_STORAGE_ACCESS_KEY = get_env("AZURE_STORAGE_ACCESS_KEY")
AZURE_STORAGE_CONNECTION_STRING = get_env("AZURE_STORAGE_CONNECTION_STRING")

DATACITE_API_URL = "https://api.datacite.org"

if FAIRHUB_ENVIRONMENT in ["staging", "dev"]:
# Using the test environment for DataCite
DATACITE_API_URL = "https://api.test.datacite.org"
19 changes: 16 additions & 3 deletions function_app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Azure Function App for ETL pipeline."""

import logging

import azure.functions as func

from utils import file_operations
from publish_pipeline.generate_high_level_metadata.generate_changelog import (
pipeline as generate_changelog_pipeline,
)
Expand All @@ -24,10 +25,12 @@
from publish_pipeline.generate_high_level_metadata.generate_study_description import (
pipeline as generate_study_description_pipeline,
)
from publish_pipeline.register_doi.register_doi import pipeline as register_doi_pipeline
from stage_one.env_sensor_pipeline import pipeline as stage_one_env_sensor_pipeline
from stage_one.img_identifier_pipeline import (
pipeline as stage_one_img_identifier_pipeline,
)
from utils import file_operations

app = func.FunctionApp()

Expand Down Expand Up @@ -166,6 +169,18 @@ def generate_discovery_metadata(req: func.HttpRequest) -> func.HttpResponse:
return func.HttpResponse("Failed", status_code=500, mimetype="application/json")


@app.route(route="register-doi", auth_level=func.AuthLevel.FUNCTION)
def register_doi(req: func.HttpRequest) -> func.HttpResponse:
"""Registers a DOI for the study."""

try:
register_doi_pipeline()
return func.HttpResponse("Success", status_code=200, mimetype="text/plain")
except Exception as e:
print(f"Exception: {e}")
return func.HttpResponse("Failed", status_code=500, mimetype="text/plain")


@app.route(route="moving-folders", auth_level=func.AuthLevel.FUNCTION)
def moving_folders(req: func.HttpRequest) -> func.HttpResponse:
"""Moves the directories along with the files in the Azure Database."""
Expand All @@ -176,5 +191,3 @@ def moving_folders(req: func.HttpRequest) -> func.HttpResponse:
def copying_folders(req: func.HttpRequest) -> func.HttpResponse:
"""Copies the directories along with the files in the Azure Database."""
return file_operations.file_operation(file_operations.copy_directory, req)


Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ def pipeline():

identifier = {}

# todo: generating a random uuid for now
# todo: replace with the actual doi when we have it
# Get the dataset identifier
identifier["identifierValue"] = str(uuid.uuid4())
cur.execute(
"SELECT doi FROM version WHERE dataset_id = %s",
(dataset_id,),
)

doi = cur.fetchone()
identifier["identifierValue"] = doi[0]
identifier["identifierType"] = "DOI"

dataset_metadata["Identifier"] = identifier
Expand Down Expand Up @@ -487,19 +491,19 @@ def pipeline():
item = {}

item["funderName"] = funding_reference[0]

item["funderIdentifier"] = {}
item["funderIdentifier"]["funderIdentifierValue"] = funding_reference[1]

if funding_reference[2] is not None and funding_reference[2] != "":
item["funderIdentifier"]["funderIdentifierType"] = funding_reference[2]
if funding_reference[3] is not None and funding_reference[3] != "":
item["funderIdentifier"]["schemeURI"] = funding_reference[3]

item["awardNumber"] = {}
item["awardNumber"]["awardNumberValue"] = funding_reference[4]

if funding_reference[5] is not None and funding_reference[5] != "":
item["awardNumber"]["awardURI"] = funding_reference[5]

if funding_reference[6] is not None and funding_reference[6] != "":
item["awardTitle"] = funding_reference[6]

Expand Down Expand Up @@ -540,19 +544,19 @@ def pipeline():
for related_item_identifier in related_item_identifiers:
item_identifier = {}

item_identifier[
"relatedItemIdentifierValue"
] = related_item_identifier[0]
item_identifier[
"relatedItemIdentifierType"
] = related_item_identifier[1]
item_identifier["relatedItemIdentifierValue"] = (
related_item_identifier[0]
)
item_identifier["relatedItemIdentifierType"] = (
related_item_identifier[1]
)
if (
related_item_identifier[2] is not None
and related_item_identifier[2] != ""
):
item_identifier[
"relatedMetadataScheme"
] = related_item_identifier[2]
item_identifier["relatedMetadataScheme"] = (
related_item_identifier[2]
)
if (
related_item_identifier[3] is not None
and related_item_identifier[3] != ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def pipeline():
pooled_data_folders.append(blob.name.split("/")[2])

# print(pooled_data_folders)
# Replacing the list of folders from DB with a hardcoded list
pooled_data_folders = ["ekg", "redcap_data", "oct"]

# Create a temporary folder on the local machine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,25 @@ def pipeline():
identification_module["OrgStudyIdInfo"] = {}

# Study Identifier
identification_module["OrgStudyIdInfo"][
"OrgStudyId"
] = primary_study_identification[0]
identification_module["OrgStudyIdInfo"]["OrgStudyId"] = (
primary_study_identification[0]
)
# Study Identifier Type
identification_module["OrgStudyIdInfo"][
"OrgStudyIdType"
] = primary_study_identification[1]
identification_module["OrgStudyIdInfo"]["OrgStudyIdType"] = (
primary_study_identification[1]
)

if primary_study_identification[2] and primary_study_identification[2] != "":
# Study Identifier Domain
identification_module["OrgStudyIdInfo"][
"OrgStudyIdDomain"
] = primary_study_identification[2]
identification_module["OrgStudyIdInfo"]["OrgStudyIdDomain"] = (
primary_study_identification[2]
)

if primary_study_identification[3] and primary_study_identification[3] != "":
# Study Identifier Link
identification_module["OrgStudyIdInfo"][
"OrgStudyIdLink"
] = primary_study_identification[3]
identification_module["OrgStudyIdInfo"]["OrgStudyIdLink"] = (
primary_study_identification[3]
)

# Get the secondary study identification metadata
cur.execute(
Expand Down Expand Up @@ -235,18 +235,18 @@ def pipeline():
design_module["DesignInfo"]["DesignAllocation"] = study_design[1]
design_module["DesignInfo"]["DesignInterventionModel"] = study_design[2]
if study_design[3] and study_design[3] != "":
design_module["DesignInfo"][
"DesignInterventionModelDescription"
] = study_design[3]
design_module["DesignInfo"]["DesignInterventionModelDescription"] = (
study_design[3]
)
design_module["DesignInfo"]["DesignPrimaryPurpose"] = study_design[4]

design_module["DesignInfo"]["DesignMaskingInfo"] = {}
design_module["DesignInfo"]["DesignMaskingInfo"][
"DesignMasking"
] = study_design[5]
design_module["DesignInfo"]["DesignMaskingInfo"][
"DesignMaskingDescription"
] = study_design[6]
design_module["DesignInfo"]["DesignMaskingInfo"]["DesignMasking"] = (
study_design[5]
)
design_module["DesignInfo"]["DesignMaskingInfo"]["DesignMaskingDescription"] = (
study_design[6]
)

design_module["DesignInfo"]["DesignMaskingInfo"]["DesignWhoMaskedList"] = []

Expand Down
Loading

0 comments on commit 3510fe6

Please sign in to comment.