Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

[Rest Server] Change gpuType(s) to skuType(s) #4362

Merged
merged 2 commits into from
Apr 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/rest-server/src/config/v2/hived.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const hivedSchema = {
'^[A-Za-z0-9._~]+$': {
type: 'object',
properties: {
gpuType: {
skuType: {
type: ['string', 'null'],
default: null,
},
Expand Down
11 changes: 9 additions & 2 deletions src/rest-server/src/config/vc.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,15 @@ const vcStatusPutInputSchema = Joi.object().keys({
const resourceUnits = {};

if (enabledHived) {
const hivedSpec = yaml.safeLoad(fs.readFileSync(hivedSpecPath));
for (let [key, val] of Object.entries(hivedSpec.physicalCluster.gpuTypes)) {
const hivedConfig = yaml.safeLoad(fs.readFileSync(hivedSpecPath));
if (!('physicalCluster' in hivedConfig &&
!!hivedConfig.physicalCluster.skuTypes &&
hivedConfig.physicalCluster.skuTypes.constructor === Object &&
Object.keys(hivedConfig.physicalCluster.skuTypes).length > 0)) {
throw new Error('Cannot find skuTypes in hivedscheduler config.');
}

for (let [key, val] of Object.entries(hivedConfig.physicalCluster.skuTypes)) {
resourceUnits[key] = {
cpu: k8s.atoi(val.cpu),
memory: k8s.convertMemoryMb(val.memory),
Expand Down
34 changes: 17 additions & 17 deletions src/rest-server/src/middlewares/v2/hived.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,50 +90,50 @@ const hivedValidate = async (protocolObj, username) => {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: ${taskRole} does not exist.`
`Taskrole ${taskRole} does not exist.`
);
}

const taskRoleConfig = hivedConfig.taskRoles[taskRole];
// at most one of [reservationId, gpuType] allowed
if (taskRoleConfig.reservationId !== null && taskRoleConfig.gpuType !== null) {
// at most one of [reservationId, skuType] allowed
if (taskRoleConfig.reservationId !== null && taskRoleConfig.skuType !== null) {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: ${taskRole} has both reservationId and gpuType, only one allowed.`
`Taskrole ${taskRole} has both reservationId and skuType, only one allowed.`
);
}

if (taskRoleConfig.gpuType !== null && !(taskRoleConfig.gpuType in resourceUnits)) {
if (taskRoleConfig.skuType !== null && !(taskRoleConfig.skuType in resourceUnits)) {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: ${taskRole} has unknown gpuType ${taskRoleConfig.gpuType}, allow ${Object.keys(resourceUnits)}.`
`Taskrole ${taskRole} has unknown skuType ${taskRoleConfig.skuType}, allow ${Object.keys(resourceUnits)}.`
);
}

const affinityGroupName = taskRoleConfig.affinityGroupName;
// affinityGroup should have uniform reservationId and gpuType
// affinityGroup should have uniform reservationId and skuType
if (affinityGroupName !== null) {
if (affinityGroupName in affinityGroups) {
if (taskRoleConfig.reservationId === null) {
taskRoleConfig.reservationId = affinityGroups[affinityGroupName].reservationId;
}
if (taskRoleConfig.gpuType === null) {
taskRoleConfig.gpuType = affinityGroups[affinityGroupName].gpuType;
if (taskRoleConfig.skuType === null) {
taskRoleConfig.skuType = affinityGroups[affinityGroupName].skuType;
}
if (taskRoleConfig.reservationId !== affinityGroups[affinityGroupName].reservationId ||
taskRoleConfig.gpuType !== affinityGroups[affinityGroupName].gpuType) {
taskRoleConfig.skuType !== affinityGroups[affinityGroupName].skuType) {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: affinityGroup: ${affinityGroupName} has inconsistent gpuType or reservationId.`
`AffinityGroup ${affinityGroupName} has inconsistent skuType or reservationId.`
);
}
} else {
affinityGroups[affinityGroupName] = {
reservationId: taskRoleConfig.reservationId,
gpuType: taskRoleConfig.gpuType,
skuType: taskRoleConfig.skuType,
affinityTaskList: [],
};
}
Expand All @@ -146,12 +146,12 @@ const hivedValidate = async (protocolObj, username) => {
}

for (let affinityGroupName of Object.keys(affinityGroups)) {
if (affinityGroups[affinityGroupName].gpuType !== null &&
if (affinityGroups[affinityGroupName].skuType !== null &&
affinityGroups[affinityGroupName].reservationId !== null) {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: affinityGroup: ${affinityGroupName} has both reservationId and gpuType, only one allowed.`
`AffinityGroup ${affinityGroupName} has both reservationId and skuType, only one allowed.`
);
}
}
Expand Down Expand Up @@ -194,7 +194,7 @@ const hivedValidate = async (protocolObj, username) => {
affinityGroup: null,
};
if (hivedConfig && hivedConfig.taskRoles && taskRole in hivedConfig.taskRoles) {
podSpec.gpuType = hivedConfig.taskRoles[taskRole].gpuType;
podSpec.gpuType = hivedConfig.taskRoles[taskRole].skuType;
if (podSpec.gpuType !== null) {
for (const t of ['gpu', 'cpu', 'memory']) {
resourcePerCell[t] = resourceUnits[podSpec.gpuType][t];
Expand Down Expand Up @@ -222,7 +222,7 @@ const hivedValidate = async (protocolObj, username) => {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: ${taskRole} requests ${gpu} GPU, ${cpu} CPU, ${memoryMB}MB memory; ` +
`Taskrole ${taskRole} requests ${gpu} GPU, ${cpu} CPU, ${memoryMB}MB memory; ` +
`sku allows ${resourcePerCell.gpu} GPU, ${resourcePerCell.cpu} CPU, ${resourcePerCell.memory}MB memory per cell.`
);
}
Expand All @@ -234,7 +234,7 @@ const hivedValidate = async (protocolObj, username) => {
throw createError(
'Bad Request',
'InvalidProtocolError',
`Hived error: exceed ${cellQuota} GPU quota in ${virtualCluster} VC.`
`Exceed ${cellQuota} GPU quota in ${virtualCluster} VC.`
);
}

Expand Down