Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Bugfix: properly handle timeout when copying project #2655

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api/specs/webserver/openapi-projects.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ paths:
schema:
type: string
description: "Option to create a template from existing project: as_template={study_uuid}"
- name: copy_data
in: query
schema:
type: boolean
default: True
description: "Option to copy data when creating from an existing template or as a template, defaults to True"
- name: hidden
in: query
schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6887,6 +6887,12 @@ paths:
schema:
type: string
description: 'Option to create a template from existing project: as_template={study_uuid}'
- name: copy_data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this option should help implementing ITISFoundation/osparc-issues#560

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is a first step

in: query
schema:
type: boolean
default: true
description: 'Option to copy data when creating from an existing template or as a template, defaults to True'
- name: hidden
in: query
schema:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional, Union
from typing import Optional, Union

import aioredlock
from aiohttp import web
Expand All @@ -9,6 +9,8 @@
get_redis_lock_manager_client,
)

from ..users_api import UserNameDict

PROJECT_REDIS_LOCK_KEY: str = "project:{}"

ProjectLock = aioredlock.Lock
Expand All @@ -20,7 +22,7 @@ async def lock_project(
project_uuid: Union[str, ProjectID],
status: ProjectStatus,
user_id: int,
user_name: Dict[str, str],
user_name: UserNameDict,
) -> ProjectLock:
"""returns a distributed redis lock on the project defined by its UUID.
NOTE: can be used as a context manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""
# pylint: disable=too-many-arguments

import asyncio
import contextlib
import json
import logging
Expand Down Expand Up @@ -52,7 +53,12 @@
)
from ..users_api import get_user_name, is_user_guest
from .config import CONFIG_SECTION_NAME
from .project_lock import ProjectLockError, get_project_locked_state, lock_project
from .project_lock import (
ProjectLockError,
UserNameDict,
get_project_locked_state,
lock_project,
)
from .projects_db import APP_PROJECT_DBAPI, ProjectDBAPI
from .projects_utils import extract_dns_without_default_port

Expand All @@ -65,9 +71,11 @@ def _is_node_dynamic(node_key: str) -> bool:
return "/dynamic/" in node_key


def validate_project(app: web.Application, project: Dict):
async def validate_project(app: web.Application, project: Dict):
project_schema = app[APP_JSONSCHEMA_SPECS_KEY][CONFIG_SECTION_NAME]
validate_instance(project, project_schema) # TODO: handl
await asyncio.get_event_loop().run_in_executor(
sanderegg marked this conversation as resolved.
Show resolved Hide resolved
None, validate_instance, project, project_schema
)


async def get_project_for_user(
Expand All @@ -80,7 +88,7 @@ async def get_project_for_user(
) -> Dict:
"""Returns a VALID project accessible to user

:raises web.HTTPNotFound: if no match found
:raises ProjectNotFoundError: if no match found
:return: schema-compliant project data
:rtype: Dict
"""
Expand All @@ -102,7 +110,7 @@ async def get_project_for_user(

# TODO: how to handle when database has an invalid project schema???
# Notice that db model does not include a check on project schema.
validate_project(app, project)
await validate_project(app, project)
return project


Expand Down Expand Up @@ -184,13 +192,13 @@ async def start_project_interactive_services(


async def delete_project(app: web.Application, project_uuid: str, user_id: int) -> None:
await delete_project_from_db(app, project_uuid, user_id)
await _delete_project_from_db(app, project_uuid, user_id)

async def _remove_services_and_data():
await remove_project_interactive_services(
user_id, project_uuid, app, notify_users=False
)
await delete_project_data(app, project_uuid, user_id)
await delete_data_folders_of_project(app, project_uuid, user_id)

fire_and_forget_task(_remove_services_and_data())

Expand Down Expand Up @@ -231,7 +239,7 @@ async def lock_with_notification(
project_uuid: str,
status: ProjectStatus,
user_id: int,
user_name: Dict[str, str],
user_name: UserNameDict,
notify_users: bool = True,
):
try:
Expand All @@ -242,40 +250,58 @@ async def lock_with_notification(
user_id,
user_name,
):
log.debug(
"Project [%s] lock acquired",
project_uuid,
)
if notify_users:
await retrieve_and_notify_project_locked_state(
user_id, project_uuid, app
)
yield

log.debug(
"Project [%s] lock released",
project_uuid,
)
except ProjectLockError:
# someone else has already the lock?
prj_states: ProjectState = await get_project_states_for_user(
user_id, project_uuid, app
)
log.error(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIP: for errors (specially those we need to debug with support) we need good info. Having variable names is more handy and easily achievable using new f-strings in 3.8

            log.error(
            "Project %s already locked in state %s. Please check with support.",
            f"{project_uuid=}",
            f"{prj_states.locked.status=}",
            )

Copy link
Member Author

@sanderegg sanderegg Nov 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm not sure I see the point here. I will get the name of the variable on top?
well I think the message already gives it out right? But I keep that formating for later. I find the f-string on top of the '%s' thing a bit much. looking forward to passing f-strings directly into the log message instead.

"Project [%s] already locked in state '%s'. Please check with support.",
project_uuid,
prj_states.locked.status,
)
raise
finally:
if notify_users:
await retrieve_and_notify_project_locked_state(user_id, project_uuid, app)


async def remove_project_interactive_services(
user_id: int, project_uuid: str, app: web.Application, notify_users: bool = True
user_id: int,
project_uuid: str,
app: web.Application,
notify_users: bool = True,
user_name: Optional[UserNameDict] = None,
) -> None:
# NOTE: during the closing process, which might take awhile,
# the project is locked so no one opens it at the same time
log.debug(
"removing project interactive services for project [%s] and user [%s]",
project_uuid,
user_id,
)
try:
log.debug(
"removing project interactive services for project [%s] and user [%s]",
project_uuid,
user_id,
)
async with await lock_project(
async with lock_with_notification(
app,
project_uuid,
ProjectStatus.CLOSING,
user_id,
await get_user_name(app, user_id),
user_name or await get_user_name(app, user_id),
notify_users=notify_users,
):
if notify_users:
await retrieve_and_notify_project_locked_state(
user_id, project_uuid, app
)

# save the state if the user is not a guest. if we do not know we save in any case.
with suppress(director_v2_api.DirectorServiceError):
# here director exceptions are suppressed. in case the service is not found to preserve old behavior
Expand All @@ -288,40 +314,16 @@ async def remove_project_interactive_services(
else True,
)
except ProjectLockError:
# maybe the someone else is already closing
prj_states: ProjectState = await get_project_states_for_user(
user_id, project_uuid, app
)
if prj_states.locked.status not in [
ProjectStatus.CLOSED,
ProjectStatus.CLOSING,
]:
log.error(
"lock for project [%s] was already taken, current state is %s. project could not be closed please check.",
project_uuid,
prj_states.locked.status,
)
finally:
# notify when done and the project is closed
if notify_users:
await retrieve_and_notify_project_locked_state(user_id, project_uuid, app)
pass


async def delete_project_data(
app: web.Application, project_uuid: str, user_id: int
) -> None:
# requests storage to delete all project's stored data
await delete_data_folders_of_project(app, project_uuid, user_id)


async def delete_project_from_db(
async def _delete_project_from_db(
app: web.Application, project_uuid: str, user_id: int
) -> None:
log.debug("deleting project '%s' for user '%s' in database", project_uuid, user_id)
db = app[APP_PROJECT_DBAPI]
await director_v2_api.delete_pipeline(app, user_id, UUID(project_uuid))
await db.delete_user_project(user_id, project_uuid)
# requests storage to delete all project's stored data
await delete_data_folders_of_project(app, project_uuid, user_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GitHK please notice this change since it points one of the main source of the problem

  • First, the function ends with _from_db and this call triggers the storage API (which takes very long to repond).
  • Second, the same function was again called via delete_project_data in delete_project ... which means that storage gets two heavy delete calls



## PROJECT NODES -----------------------------------------------------
Expand Down Expand Up @@ -647,17 +649,15 @@ async def try_open_project_for_user(
user_id: int, project_uuid: str, client_session_id: str, app: web.Application
) -> bool:
try:
async with await lock_project(
async with lock_with_notification(
app,
project_uuid,
ProjectStatus.OPENING,
user_id,
await get_user_name(app, user_id),
notify_users=False,
):
log.debug(
"project [%s] lock acquired, now checking if project is available",
project_uuid,
)

with managed_resource(user_id, client_session_id, app) as rt:
user_session_id_list: List[
UserSessionID
Expand Down Expand Up @@ -769,7 +769,7 @@ async def _get_project_lock_state(
project_uuid,
set_user_ids,
)
usernames: List[Dict[str, str]] = [
usernames: List[UserNameDict] = [
await get_user_name(app, uid) for uid in set_user_ids
]
# let's check if the project is opened by the same user, maybe already opened or closed in a orphaned session
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from aiopg.sa.connection import SAConnection
from aiopg.sa.result import RowProxy
from change_case import ChangeCase
from models_library.projects import ProjectAtDB
from models_library.projects import ProjectAtDB, ProjectIDStr
from pydantic import ValidationError
from pydantic.types import PositiveInt
from servicelib.aiohttp.application_keys import APP_DB_ENGINE_KEY
Expand Down Expand Up @@ -815,19 +815,26 @@ async def list_all_projects_by_uuid_for_user(self, user_id: int) -> List[str]:
result.append(row[0])
return list(result)

async def update_project_without_enforcing_checks(
self, project_data: Dict, project_uuid: str
async def update_project_without_checking_permissions(
self,
project_data: Dict,
project_uuid: ProjectIDStr,
*,
hidden: Optional[bool] = None,
) -> bool:
"""The garbage collector needs to alter the row without passing through the
permissions layer."""
async with self.engine.acquire() as conn:
# update timestamps
project_data["lastChangeDate"] = now_str()
# now update it
updated_values = _convert_to_db_names(project_data)
sanderegg marked this conversation as resolved.
Show resolved Hide resolved
if hidden is not None:
updated_values["hidden"] = hidden
result = await conn.execute(
# pylint: disable=no-value-for-parameter
projects.update()
.values(**_convert_to_db_names(project_data))
.values(**updated_values)
.where(projects.c.uuid == project_uuid)
)
return result.rowcount == 1
Expand Down
Loading