Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: execute component in parallel #366

Merged
merged 1 commit into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions pkg/utils/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,16 @@ func (d *dag) GetAncestorIDs(id string) []string {
return d.ancestorsMap[id]
}

func (d *dag) TopologicalSort() ([]*datamodel.Component, error) {
type topologicalSortNode struct {
comp *datamodel.Component
group int // the group order
}

// TopologicalSort returns the topological sorted components
// the result is a list of list of components
// each list is a group of components that can be executed in parallel
func (d *dag) TopologicalSort() ([][]*datamodel.Component, error) {

if len(d.comps) == 0 {
return nil, fmt.Errorf("no components")
}
Expand All @@ -115,29 +124,43 @@ func (d *dag) TopologicalSort() ([]*datamodel.Component, error) {
}

}
q := []*datamodel.Component{}
q := []*topologicalSortNode{}
for _, comp := range d.comps {
if indegreesMap[comp] == 0 {
q = append(q, comp)
q = append(q, &topologicalSortNode{
comp: comp,
group: 0,
})
}
}

ans := []*datamodel.Component{}
ans := [][]*datamodel.Component{}

count := 0
taken := make(map[*datamodel.Component]bool)
for len(q) > 0 {
from := q[0]
q = q[1:]
ans = append(ans, from)
taken[from] = true
for _, to := range d.prerequisitesMap[from] {
if len(ans) <= from.group {
ans = append(ans, []*datamodel.Component{})
}
ans[from.group] = append(ans[from.group], from.comp)
count += 1
taken[from.comp] = true

for _, to := range d.prerequisitesMap[from.comp] {
indegreesMap[to]--
if indegreesMap[to] == 0 {
q = append(q, to)
q = append(q, &topologicalSortNode{
comp: to,
group: from.group + 1,
})
}
}

}

if len(ans) < len(d.comps) {
if count < len(d.comps) {
return nil, fmt.Errorf("not a valid dag")
}

Expand Down
98 changes: 50 additions & 48 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,64 +138,66 @@ func NewConnectorDataPoint(data ConnectorUsageMetricData, pipelineMetadata *stru
)
}

func GenerateTraces(comps []*datamodel.Component, memory []map[string]interface{}, status []map[string]*ComponentStatus, computeTime map[string]float32, batchSize int) (map[string]*pipelinePB.Trace, error) {
func GenerateTraces(comps [][]*datamodel.Component, memory []map[string]interface{}, status []map[string]*ComponentStatus, computeTime map[string]float32, batchSize int) (map[string]*pipelinePB.Trace, error) {
trace := map[string]*pipelinePB.Trace{}
for compIdx := range comps {
inputs := []*structpb.Struct{}
outputs := []*structpb.Struct{}
var traceStatuses []pipelinePB.Trace_Status
for dataIdx := 0; dataIdx < batchSize; dataIdx++ {
if status[dataIdx][comps[compIdx].ID].Completed {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_COMPLETED)
} else if status[dataIdx][comps[compIdx].ID].Skipped {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_SKIPPED)
} else if status[dataIdx][comps[compIdx].ID].Error {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_ERROR)
} else {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_UNSPECIFIED)
}

}

if comps[compIdx].DefinitionName != "operator-definitions/2ac8be70-0f7a-4b61-a33d-098b8acfa6f3" &&
comps[compIdx].DefinitionName != "operator-definitions/4f39c8bc-8617-495d-80de-80d0f5397516" {
for groupIdx := range comps {
for compIdx := range comps[groupIdx] {
inputs := []*structpb.Struct{}
outputs := []*structpb.Struct{}
var traceStatuses []pipelinePB.Trace_Status
for dataIdx := 0; dataIdx < batchSize; dataIdx++ {
if _, ok := memory[dataIdx][comps[compIdx].ID].(map[string]interface{})["input"]; ok {
data, err := json.Marshal(memory[dataIdx][comps[compIdx].ID].(map[string]interface{})["input"])
if err != nil {
return nil, err
}
inputStruct := &structpb.Struct{}
err = protojson.Unmarshal(data, inputStruct)
if err != nil {
return nil, err
}
inputs = append(inputs, inputStruct)
if status[dataIdx][comps[groupIdx][compIdx].ID].Completed {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_COMPLETED)
} else if status[dataIdx][comps[groupIdx][compIdx].ID].Skipped {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_SKIPPED)
} else if status[dataIdx][comps[groupIdx][compIdx].ID].Error {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_ERROR)
} else {
traceStatuses = append(traceStatuses, pipelinePB.Trace_STATUS_UNSPECIFIED)
}

}
for dataIdx := 0; dataIdx < batchSize; dataIdx++ {
if _, ok := memory[dataIdx][comps[compIdx].ID].(map[string]interface{})["output"]; ok {
data, err := json.Marshal(memory[dataIdx][comps[compIdx].ID].(map[string]interface{})["output"])
if err != nil {
return nil, err
}
outputStruct := &structpb.Struct{}
err = protojson.Unmarshal(data, outputStruct)
if err != nil {
return nil, err

if comps[groupIdx][compIdx].DefinitionName != "operator-definitions/2ac8be70-0f7a-4b61-a33d-098b8acfa6f3" &&
comps[groupIdx][compIdx].DefinitionName != "operator-definitions/4f39c8bc-8617-495d-80de-80d0f5397516" {
for dataIdx := 0; dataIdx < batchSize; dataIdx++ {
if _, ok := memory[dataIdx][comps[groupIdx][compIdx].ID].(map[string]interface{})["input"]; ok {
data, err := json.Marshal(memory[dataIdx][comps[groupIdx][compIdx].ID].(map[string]interface{})["input"])
if err != nil {
return nil, err
}
inputStruct := &structpb.Struct{}
err = protojson.Unmarshal(data, inputStruct)
if err != nil {
return nil, err
}
inputs = append(inputs, inputStruct)
}
outputs = append(outputs, outputStruct)

}
for dataIdx := 0; dataIdx < batchSize; dataIdx++ {
if _, ok := memory[dataIdx][comps[groupIdx][compIdx].ID].(map[string]interface{})["output"]; ok {
data, err := json.Marshal(memory[dataIdx][comps[groupIdx][compIdx].ID].(map[string]interface{})["output"])
if err != nil {
return nil, err
}
outputStruct := &structpb.Struct{}
err = protojson.Unmarshal(data, outputStruct)
if err != nil {
return nil, err
}
outputs = append(outputs, outputStruct)
}

}
}
}

trace[comps[compIdx].ID] = &pipelinePB.Trace{
Statuses: traceStatuses,
Inputs: inputs,
Outputs: outputs,
ComputeTimeInSeconds: computeTime[comps[compIdx].ID],
trace[comps[groupIdx][compIdx].ID] = &pipelinePB.Trace{
Statuses: traceStatuses,
Inputs: inputs,
Outputs: outputs,
ComputeTimeInSeconds: computeTime[comps[groupIdx][compIdx].ID],
}
}
}
return trace, nil
Expand Down
4 changes: 2 additions & 2 deletions pkg/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ const TaskQueue = "pipeline-backend"
// Worker interface
type Worker interface {
TriggerPipelineWorkflow(ctx workflow.Context, param *TriggerPipelineWorkflowRequest) (*TriggerPipelineWorkflowResponse, error)
ConnectorActivity(ctx context.Context, param *ExecuteConnectorActivityRequest) (*ExecuteConnectorActivityResponse, error)
OperatorActivity(ctx context.Context, param *ExecuteOperatorActivityRequest) (*ExecuteOperatorActivityResponse, error)
ConnectorActivity(ctx context.Context, param *ExecuteConnectorActivityRequest) (*ExecuteActivityResponse, error)
OperatorActivity(ctx context.Context, param *ExecuteOperatorActivityRequest) (*ExecuteActivityResponse, error)
}

// worker represents resources required to run Temporal workflow and activity
Expand Down
Loading
Loading