Skip to content

Commit

Permalink
Fix #184, work around throttle sem creation race
Browse files Browse the repository at this point in the history
Adds a retry loop around OS_CountSemGetIdByName, because if this sem is
created by another app there may be some delay until the other app gets
to the point where it creates the sem.  This works around the race
condition.

A retry limit is also imposed so CF will not spin here forever.
  • Loading branch information
jphickey committed Nov 17, 2022
1 parent 281a941 commit 833fdbb
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 3 deletions.
22 changes: 22 additions & 0 deletions fsw/platform_inc/cf_platform_cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,28 @@ typedef uint32 CF_TransactionSeq_t;
*/
#define CF_RCVMSG_TIMEOUT (100)

/**
* @brief Limits the number of retries to obtain the CF throttle sem
*
* @par Description
* If the CF throttle sem is not available during CF startup, the initialization
* will retry after a short delay.
*
* @sa CF_STARTUP_SEM_TASK_DELAY
*/
#define CF_STARTUP_SEM_MAX_RETRIES 25

/**
* @brief Number of milliseconds to wait if CF throttle sem is not available
*
* @par Description
* If the CF throttle sem is not available during CF startup, the initialization
* will delay for this period of time before trying again
*
* @sa CF_STARTUP_SEM_MAX_RETRIES
*/
#define CF_STARTUP_SEM_TASK_DELAY 100

/**
* \brief Mission specific version number
*
Expand Down
23 changes: 21 additions & 2 deletions fsw/src/cf_cfdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,27 @@ int32 CF_CFDP_InitEngine(void)

if (CF_AppData.config_table->chan[i].sem_name[0])
{
ret = OS_CountSemGetIdByName(&CF_AppData.engine.channels[i].sem_id,
CF_AppData.config_table->chan[i].sem_name);
/*
* There is a start up race condition because CFE starts all apps at the same time,
* and if this sem is instantiated by another app, it may not be created yet.
*
* Therefore if OSAL returns OS_ERR_NAME_NOT_FOUND, assume this is what is going
* on, delay a bit and try again.
*/
ret = OS_ERR_NAME_NOT_FOUND;
for (j = 0; j < CF_STARTUP_SEM_MAX_RETRIES; ++j)
{
ret = OS_CountSemGetIdByName(&CF_AppData.engine.channels[i].sem_id,
CF_AppData.config_table->chan[i].sem_name);

if (ret != OS_ERR_NAME_NOT_FOUND)
{
break;
}

OS_TaskDelay(CF_STARTUP_SEM_TASK_DELAY);
}

if (ret != OS_SUCCESS)
{
CFE_EVS_SendEvent(CF_EID_ERR_INIT_SEM, CFE_EVS_EventType_ERROR,
Expand Down
20 changes: 19 additions & 1 deletion unit-test/cf_cfdp_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,12 +863,30 @@ void Test_CF_CFDP_InitEngine(void)
UtAssert_INT32_EQ(CF_CFDP_InitEngine(), 0);
UtAssert_BOOL_TRUE(CF_AppData.engine.enabled);

/* failure of OS_CountSemGetIdByName */
/* failure of OS_CountSemGetIdByName for non-name reason */
UT_CFDP_SetupBasicTestState(UT_CF_Setup_NONE, NULL, NULL, NULL, NULL, &config);
config->chan[0].sem_name[0] = 'u';
UT_SetDefaultReturnValue(UT_KEY(OS_CountSemGetIdByName), OS_ERROR);
UtAssert_INT32_EQ(CF_CFDP_InitEngine(), OS_ERROR);
UtAssert_BOOL_FALSE(CF_AppData.engine.enabled);
UT_CF_AssertEventID(CF_EID_ERR_INIT_SEM);

/* Max retries of OS_CountSemGetIdByName - sem was never created at all */
UT_CFDP_SetupBasicTestState(UT_CF_Setup_NONE, NULL, NULL, NULL, NULL, &config);
config->chan[0].sem_name[0] = 'u';
UT_SetDefaultReturnValue(UT_KEY(OS_CountSemGetIdByName), OS_ERR_NAME_NOT_FOUND);
UtAssert_INT32_EQ(CF_CFDP_InitEngine(), OS_ERR_NAME_NOT_FOUND);
UtAssert_BOOL_FALSE(CF_AppData.engine.enabled);
UT_CF_AssertEventID(CF_EID_ERR_INIT_SEM);

/* Retry of OS_CountSemGetIdByName, when sem was created late, and thus
* got return OS_ERR_NAME_NOT_FOUND followed by OS_SUCCESS */
UT_CFDP_SetupBasicTestState(UT_CF_Setup_NONE, NULL, NULL, NULL, NULL, &config);
config->chan[0].sem_name[0] = 'u';
UT_SetDefaultReturnValue(UT_KEY(OS_CountSemGetIdByName), OS_SUCCESS);
UT_SetDeferredRetcode(UT_KEY(OS_CountSemGetIdByName), 1, OS_ERR_NAME_NOT_FOUND);
UtAssert_INT32_EQ(CF_CFDP_InitEngine(), 0);
UtAssert_BOOL_TRUE(CF_AppData.engine.enabled);

/* failure of CFE_SB_CreatePipe */
UT_CFDP_SetupBasicTestState(UT_CF_Setup_NONE, NULL, NULL, NULL, NULL, &config);
Expand Down

0 comments on commit 833fdbb

Please sign in to comment.