Skip to content

Commit

Permalink
Improved the runtime of DiffVulnerabilityResults (#1091)
Browse files Browse the repository at this point in the history
- Improved the runtime of DiffVulnerabilityResults
- Precomputed the existence of Sources, Packages, and Vulnerabilities,
which brings the time complexity down from O(n^2) to O(n).
  • Loading branch information
neilnaveen authored Jul 12, 2024
1 parent 56c68b8 commit 45fb6f9
Showing 1 changed file with 40 additions and 12 deletions.
52 changes: 40 additions & 12 deletions internal/ci/vulnerability_result_diff.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
package ci

import (
"slices"

"github.com/google/osv-scanner/internal/output"
"github.com/google/osv-scanner/pkg/grouper"
"github.com/google/osv-scanner/pkg/models"
)

// DiffVulnerabilityResults will return any new vulnerabilities that are in `newRes`
// which is not present in `oldRes`, but not the reverse.
//
// Current implementation is O(n^2) on the number of vulns, but can be reduced to linear time
func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models.VulnerabilityResults {
result := models.VulnerabilityResults{}
// Initialize caches for quick lookup
sourceToIndex, packageToIndex, vulnToIndex := initializeCaches(oldRes)

for _, ps := range newRes.Results {
sourceIdx := slices.IndexFunc(oldRes.Results, func(elem models.PackageSource) bool { return elem.Source == ps.Source })
if sourceIdx == -1 {
sourceIdx, sourceExists := sourceToIndex[ps.Source]
if !sourceExists {
// Newly introduced source, so all results for this source are going to be new, add everything for this source
result.Results = append(result.Results, ps)
continue
Expand All @@ -27,9 +26,8 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models
})
resultPS := &result.Results[len(result.Results)-1]
for _, pv := range ps.Packages {
pkgs := oldRes.Results[sourceIdx].Packages
pkgIdx := slices.IndexFunc(pkgs, func(elem models.PackageVulns) bool { return elem.Package == pv.Package })
if pkgIdx == -1 {
pkgIdx, packageExists := packageToIndex[sourceIdx][pv.Package]
if !packageExists {
// Newly introduced package, so all results for this package are going to be new, add everything for this package
resultPS.Packages = append(resultPS.Packages, pv)
continue
Expand All @@ -41,9 +39,7 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models
})
resultPV := &resultPS.Packages[len(resultPS.Packages)-1]
for _, v := range pv.Vulnerabilities {
vulns := pkgs[pkgIdx].Vulnerabilities
vulnIdx := slices.IndexFunc(vulns, func(elem models.Vulnerability) bool { return elem.ID == v.ID })
if vulnIdx == -1 {
if !vulnToIndex[sourceIdx][pkgIdx][v.ID] {
// Vulnerability is new, add it to the results
resultPV.Vulnerabilities = append(resultPV.Vulnerabilities, v)
continue
Expand Down Expand Up @@ -71,6 +67,38 @@ func DiffVulnerabilityResults(oldRes, newRes models.VulnerabilityResults) models
return result
}

// initializeCaches sets up maps for quick lookup of sources, packages, and vulnerabilities by their indices.
func initializeCaches(oldRes models.VulnerabilityResults) (map[models.SourceInfo]int, []map[models.PackageInfo]int, [][]map[string]bool) {
sourceToIndex := make(map[models.SourceInfo]int, len(oldRes.Results))
// The index in the array corresponds to a source index, a query would look like packageToIndex[sourceIndex][packageInfo]
packageToIndex := make([]map[models.PackageInfo]int, len(oldRes.Results))
// The first index in the array corresponds to a source index, and the second index corresponds to a package index
// a query would look like vulnToIndex[sourceIndex][packageIndex][vulnID]
vulnToIndex := make([][]map[string]bool, len(oldRes.Results))

// Populate index maps for sources, packages, and vulnerabilities
for sourceIndex, vulnResult := range oldRes.Results {
sourceToIndex[oldRes.Results[sourceIndex].Source] = sourceIndex
if vulnToIndex[sourceIndex] == nil {
vulnToIndex[sourceIndex] = make([]map[string]bool, len(vulnResult.Packages))
}
for packageIndex, pkg := range vulnResult.Packages {
if packageToIndex[sourceIndex] == nil {
packageToIndex[sourceIndex] = make(map[models.PackageInfo]int, len(vulnResult.Packages))
}
packageToIndex[sourceIndex][pkg.Package] = packageIndex
if vulnToIndex[sourceIndex][packageIndex] == nil {
vulnToIndex[sourceIndex][packageIndex] = make(map[string]bool, len(pkg.Vulnerabilities))
}
for _, vuln := range pkg.Vulnerabilities {
vulnToIndex[sourceIndex][packageIndex][vuln.ID] = true // Mark the vulnerability as present
}
}
}

return sourceToIndex, packageToIndex, vulnToIndex
}

// DiffVulnerabilityResultsByOccurrences will return the occurrence of each vulnerability that are in `newRes`
// which is not present in `oldRes`, but not the reverse. This calculates the difference by vulnerability ID,
// while ignoring the source of the vulnerability.
Expand Down

0 comments on commit 45fb6f9

Please sign in to comment.