From 45fb6f9432c55444bf8e049f9d3490880347fcbe Mon Sep 17 00:00:00 2001 From: Neil Naveen <42328488+neilnaveen@users.noreply.github.com> Date: Fri, 12 Jul 2024 10:06:16 -0500 Subject: [PATCH] Improved the runtime of DiffVulnerabilityResults (#1091) - Improved the runtime of DiffVulnerabilityResults - Precomputed the existence of Sources, Packages, and Vulnerabilities, which brings the time complexity down from O(n^2) to O(n). --- internal/ci/vulnerability_result_diff.go | 52 ++++++++++++++++++------ 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/internal/ci/vulnerability_result_diff.go b/internal/ci/vulnerability_result_diff.go index 0f6bf95409a..bd260acde43 100644 --- a/internal/ci/vulnerability_result_diff.go +++ b/internal/ci/vulnerability_result_diff.go @@ -1,8 +1,6 @@ package ci import ( - "slices" - "github.com/google/osv-scanner/internal/output" "github.com/google/osv-scanner/pkg/grouper" "github.com/google/osv-scanner/pkg/models" @@ -10,13 +8,14 @@ import ( // DiffVulnerabilityResults will return any new vulnerabilities that are in `newRes` // which is not present in `oldRes`, but not the reverse. -// -// Current implementation is O(n^2) on the number of vulns, but can be reduced to linear time func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models.VulnerabilityResults { result := models.VulnerabilityResults{} + // Initialize caches for quick lookup + sourceToIndex, packageToIndex, vulnToIndex := initializeCaches(oldRes) + for _, ps := range newRes.Results { - sourceIdx := slices.IndexFunc(oldRes.Results, func(elem models.PackageSource) bool { return elem.Source == ps.Source }) - if sourceIdx == -1 { + sourceIdx, sourceExists := sourceToIndex[ps.Source] + if !sourceExists { // Newly introduced source, so all results for this source are going to be new, add everything for this source result.Results = append(result.Results, ps) continue @@ -27,9 +26,8 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models }) resultPS := &result.Results[len(result.Results)-1] for _, pv := range ps.Packages { - pkgs := oldRes.Results[sourceIdx].Packages - pkgIdx := slices.IndexFunc(pkgs, func(elem models.PackageVulns) bool { return elem.Package == pv.Package }) - if pkgIdx == -1 { + pkgIdx, packageExists := packageToIndex[sourceIdx][pv.Package] + if !packageExists { // Newly introduced package, so all results for this package are going to be new, add everything for this package resultPS.Packages = append(resultPS.Packages, pv) continue @@ -41,9 +39,7 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models }) resultPV := &resultPS.Packages[len(resultPS.Packages)-1] for _, v := range pv.Vulnerabilities { - vulns := pkgs[pkgIdx].Vulnerabilities - vulnIdx := slices.IndexFunc(vulns, func(elem models.Vulnerability) bool { return elem.ID == v.ID }) - if vulnIdx == -1 { + if !vulnToIndex[sourceIdx][pkgIdx][v.ID] { // Vulnerability is new, add it to the results resultPV.Vulnerabilities = append(resultPV.Vulnerabilities, v) continue @@ -71,6 +67,38 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models return result } +// initializeCaches sets up maps for quick lookup of sources, packages, and vulnerabilities by their indices. +func initializeCaches(oldRes models.VulnerabilityResults) (map[models.SourceInfo]int, []map[models.PackageInfo]int, [][]map[string]bool) { + sourceToIndex := make(map[models.SourceInfo]int, len(oldRes.Results)) + // The index in the array corresponds to a source index, a query would look like packageToIndex[sourceIndex][packageInfo] + packageToIndex := make([]map[models.PackageInfo]int, len(oldRes.Results)) + // The first index in the array corresponds to a source index, and the second index corresponds to a package index + // a query would look like vulnToIndex[sourceIndex][packageIndex][vulnID] + vulnToIndex := make([][]map[string]bool, len(oldRes.Results)) + + // Populate index maps for sources, packages, and vulnerabilities + for sourceIndex, vulnResult := range oldRes.Results { + sourceToIndex[oldRes.Results[sourceIndex].Source] = sourceIndex + if vulnToIndex[sourceIndex] == nil { + vulnToIndex[sourceIndex] = make([]map[string]bool, len(vulnResult.Packages)) + } + for packageIndex, pkg := range vulnResult.Packages { + if packageToIndex[sourceIndex] == nil { + packageToIndex[sourceIndex] = make(map[models.PackageInfo]int, len(vulnResult.Packages)) + } + packageToIndex[sourceIndex][pkg.Package] = packageIndex + if vulnToIndex[sourceIndex][packageIndex] == nil { + vulnToIndex[sourceIndex][packageIndex] = make(map[string]bool, len(pkg.Vulnerabilities)) + } + for _, vuln := range pkg.Vulnerabilities { + vulnToIndex[sourceIndex][packageIndex][vuln.ID] = true // Mark the vulnerability as present + } + } + } + + return sourceToIndex, packageToIndex, vulnToIndex +} + // DiffVulnerabilityResultsByOccurrences will return the occurrence of each vulnerability that are in `newRes` // which is not present in `oldRes`, but not the reverse. This calculates the difference by vulnerability ID, // while ignoring the source of the vulnerability.