Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ doi registration #9

Merged
merged 14 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Lint

on: [push]

jobs:
lint:
name: Run linters
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: pip install -r requirements.txt

- name: Lint with flake8
run: poe flake8

- name: Type check with mypy
run: poe typecheck

- name: Lint with pylint
run: poe pylint
11 changes: 11 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Configuration for the application."""

from os import environ
from pathlib import Path

Expand Down Expand Up @@ -32,5 +33,15 @@ def get_env(key, optional=False):
FAIRHUB_DATABASE_USER = get_env("FAIRHUB_DATABASE_USER", optional=True)
FAIRHUB_DATABASE_PORT = get_env("FAIRHUB_DATABASE_PORT", optional=True)

FAIRHUB_ENVIRONMENT = get_env("FAIRHUB_ENVIRONMENT")

DATACITE_CREDENTIALS = get_env("DATACITE_CREDENTIALS", optional=True)

AZURE_STORAGE_ACCESS_KEY = get_env("AZURE_STORAGE_ACCESS_KEY")
AZURE_STORAGE_CONNECTION_STRING = get_env("AZURE_STORAGE_CONNECTION_STRING")

DATACITE_API_URL = "https://api.datacite.org"

if FAIRHUB_ENVIRONMENT in ["staging", "dev"]:
# Using the test environment for DataCite
DATACITE_API_URL = "https://api.test.datacite.org"
19 changes: 16 additions & 3 deletions function_app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Azure Function App for ETL pipeline."""

import logging

import azure.functions as func

from utils import file_operations
from publish_pipeline.generate_high_level_metadata.generate_changelog import (
pipeline as generate_changelog_pipeline,
)
Expand All @@ -24,10 +25,12 @@
from publish_pipeline.generate_high_level_metadata.generate_study_description import (
pipeline as generate_study_description_pipeline,
)
from publish_pipeline.register_doi.register_doi import pipeline as register_doi_pipeline
from stage_one.env_sensor_pipeline import pipeline as stage_one_env_sensor_pipeline
from stage_one.img_identifier_pipeline import (
pipeline as stage_one_img_identifier_pipeline,
)
from utils import file_operations

app = func.FunctionApp()

Expand Down Expand Up @@ -166,6 +169,18 @@ def generate_discovery_metadata(req: func.HttpRequest) -> func.HttpResponse:
return func.HttpResponse("Failed", status_code=500, mimetype="application/json")


@app.route(route="register-doi", auth_level=func.AuthLevel.FUNCTION)
def register_doi(req: func.HttpRequest) -> func.HttpResponse:
"""Registers a DOI for the study."""

try:
register_doi_pipeline()
return func.HttpResponse("Success", status_code=200, mimetype="text/plain")
except Exception as e:
print(f"Exception: {e}")
return func.HttpResponse("Failed", status_code=500, mimetype="text/plain")


@app.route(route="moving-folders", auth_level=func.AuthLevel.FUNCTION)
def moving_folders(req: func.HttpRequest) -> func.HttpResponse:
"""Moves the directories along with the files in the Azure Database."""
Expand All @@ -176,5 +191,3 @@ def moving_folders(req: func.HttpRequest) -> func.HttpResponse:
def copying_folders(req: func.HttpRequest) -> func.HttpResponse:
"""Copies the directories along with the files in the Azure Database."""
return file_operations.file_operation(file_operations.copy_directory, req)


Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ def pipeline():

identifier = {}

# todo: generating a random uuid for now
# todo: replace with the actual doi when we have it
# Get the dataset identifier
identifier["identifierValue"] = str(uuid.uuid4())
cur.execute(
"SELECT doi FROM version WHERE dataset_id = %s",
(dataset_id,),
)

doi = cur.fetchone()
identifier["identifierValue"] = doi[0]
identifier["identifierType"] = "DOI"

dataset_metadata["Identifier"] = identifier
Expand Down Expand Up @@ -487,19 +491,19 @@ def pipeline():
item = {}

item["funderName"] = funding_reference[0]

item["funderIdentifier"] = {}
item["funderIdentifier"]["funderIdentifierValue"] = funding_reference[1]

if funding_reference[2] is not None and funding_reference[2] != "":
item["funderIdentifier"]["funderIdentifierType"] = funding_reference[2]
if funding_reference[3] is not None and funding_reference[3] != "":
item["funderIdentifier"]["schemeURI"] = funding_reference[3]

item["awardNumber"] = {}
item["awardNumber"]["awardNumberValue"] = funding_reference[4]

if funding_reference[5] is not None and funding_reference[5] != "":
item["awardNumber"]["awardURI"] = funding_reference[5]

if funding_reference[6] is not None and funding_reference[6] != "":
item["awardTitle"] = funding_reference[6]

Expand Down Expand Up @@ -540,19 +544,19 @@ def pipeline():
for related_item_identifier in related_item_identifiers:
item_identifier = {}

item_identifier[
"relatedItemIdentifierValue"
] = related_item_identifier[0]
item_identifier[
"relatedItemIdentifierType"
] = related_item_identifier[1]
item_identifier["relatedItemIdentifierValue"] = (
related_item_identifier[0]
)
item_identifier["relatedItemIdentifierType"] = (
related_item_identifier[1]
)
if (
related_item_identifier[2] is not None
and related_item_identifier[2] != ""
):
item_identifier[
"relatedMetadataScheme"
] = related_item_identifier[2]
item_identifier["relatedMetadataScheme"] = (
related_item_identifier[2]
)
if (
related_item_identifier[3] is not None
and related_item_identifier[3] != ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def pipeline():
pooled_data_folders.append(blob.name.split("/")[2])

# print(pooled_data_folders)
# Replacing the list of folders from DB with a hardcoded list
pooled_data_folders = ["ekg", "redcap_data", "oct"]

# Create a temporary folder on the local machine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,25 @@ def pipeline():
identification_module["OrgStudyIdInfo"] = {}

# Study Identifier
identification_module["OrgStudyIdInfo"][
"OrgStudyId"
] = primary_study_identification[0]
identification_module["OrgStudyIdInfo"]["OrgStudyId"] = (
primary_study_identification[0]
)
# Study Identifier Type
identification_module["OrgStudyIdInfo"][
"OrgStudyIdType"
] = primary_study_identification[1]
identification_module["OrgStudyIdInfo"]["OrgStudyIdType"] = (
primary_study_identification[1]
)

if primary_study_identification[2] and primary_study_identification[2] != "":
# Study Identifier Domain
identification_module["OrgStudyIdInfo"][
"OrgStudyIdDomain"
] = primary_study_identification[2]
identification_module["OrgStudyIdInfo"]["OrgStudyIdDomain"] = (
primary_study_identification[2]
)

if primary_study_identification[3] and primary_study_identification[3] != "":
# Study Identifier Link
identification_module["OrgStudyIdInfo"][
"OrgStudyIdLink"
] = primary_study_identification[3]
identification_module["OrgStudyIdInfo"]["OrgStudyIdLink"] = (
primary_study_identification[3]
)

# Get the secondary study identification metadata
cur.execute(
Expand Down Expand Up @@ -235,18 +235,18 @@ def pipeline():
design_module["DesignInfo"]["DesignAllocation"] = study_design[1]
design_module["DesignInfo"]["DesignInterventionModel"] = study_design[2]
if study_design[3] and study_design[3] != "":
design_module["DesignInfo"][
"DesignInterventionModelDescription"
] = study_design[3]
design_module["DesignInfo"]["DesignInterventionModelDescription"] = (
study_design[3]
)
design_module["DesignInfo"]["DesignPrimaryPurpose"] = study_design[4]

design_module["DesignInfo"]["DesignMaskingInfo"] = {}
design_module["DesignInfo"]["DesignMaskingInfo"][
"DesignMasking"
] = study_design[5]
design_module["DesignInfo"]["DesignMaskingInfo"][
"DesignMaskingDescription"
] = study_design[6]
design_module["DesignInfo"]["DesignMaskingInfo"]["DesignMasking"] = (
study_design[5]
)
design_module["DesignInfo"]["DesignMaskingInfo"]["DesignMaskingDescription"] = (
study_design[6]
)

design_module["DesignInfo"]["DesignMaskingInfo"]["DesignWhoMaskedList"] = []

Expand Down
Loading
Loading